|
|
|
@ -36,36 +36,10 @@ class TrainingPreSplitInfo(BaseModel):
|
|
|
|
|
Configuration for training mode when data is pre-split (is_split is True).
|
|
|
|
|
Contains:
|
|
|
|
|
- train_dir, valid_dir, test_dir: Directories for training, validation, and testing data.
|
|
|
|
|
- train_size, valid_size, test_size: Data split ratios or counts.
|
|
|
|
|
- train_offset, valid_offset, test_offset: Offsets for respective splits.
|
|
|
|
|
"""
|
|
|
|
|
train_dir: str = "." # Directory for training data if data is pre-split
|
|
|
|
|
valid_dir: str = "" # Directory for validation data if data is pre-split
|
|
|
|
|
test_dir: str = "" # Directory for testing data if data is pre-split
|
|
|
|
|
train_size: Union[int, float] = 0.7 # Training data size (int for static, float in (0,1] for dynamic)
|
|
|
|
|
valid_size: Union[int, float] = 0.2 # Validation data size (int for static, float in (0,1] for dynamic)
|
|
|
|
|
test_size: Union[int, float] = 0.1 # Testing data size (int for static, float in (0,1] for dynamic)
|
|
|
|
|
train_offset: int = 0 # Offset for training data
|
|
|
|
|
valid_offset: int = 0 # Offset for validation data
|
|
|
|
|
test_offset: int = 0 # Offset for testing data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@field_validator("train_size", "valid_size", "test_size", mode="before")
|
|
|
|
|
def validate_sizes(cls, v: Union[int, float]) -> Union[int, float]:
|
|
|
|
|
"""
|
|
|
|
|
Validates size values:
|
|
|
|
|
- If provided as a float, must be in the range (0, 1].
|
|
|
|
|
- If provided as an int, must be non-negative.
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(v, float):
|
|
|
|
|
if not (0 <= v <= 1):
|
|
|
|
|
raise ValueError("When provided as a float, size must be in the range (0, 1]")
|
|
|
|
|
elif isinstance(v, int):
|
|
|
|
|
if v < 0:
|
|
|
|
|
raise ValueError("When provided as an int, size must be non-negative")
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Size must be either an int or a float")
|
|
|
|
|
return v
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DatasetTrainingConfig(BaseModel):
|
|
|
|
@ -75,6 +49,8 @@ class DatasetTrainingConfig(BaseModel):
|
|
|
|
|
- is_split: Determines whether data is pre-split.
|
|
|
|
|
- pre_split: Configuration for when data is NOT pre-split.
|
|
|
|
|
- split: Configuration for when data is pre-split.
|
|
|
|
|
- train_size, valid_size, test_size: Data split ratios or counts.
|
|
|
|
|
- train_offset, valid_offset, test_offset: Offsets for respective splits.
|
|
|
|
|
- Other training parameters: batch_size, num_epochs, val_freq, use_amp, pretrained_weights.
|
|
|
|
|
|
|
|
|
|
Both pre_split and split objects are always created, but only the one corresponding
|
|
|
|
@ -84,12 +60,37 @@ class DatasetTrainingConfig(BaseModel):
|
|
|
|
|
pre_split: TrainingPreSplitInfo = TrainingPreSplitInfo()
|
|
|
|
|
split: TrainingSplitInfo = TrainingSplitInfo()
|
|
|
|
|
|
|
|
|
|
train_size: Union[int, float] = 0.7 # Training data size (int for static, float in (0,1] for dynamic)
|
|
|
|
|
valid_size: Union[int, float] = 0.2 # Validation data size (int for static, float in (0,1] for dynamic)
|
|
|
|
|
test_size: Union[int, float] = 0.1 # Testing data size (int for static, float in (0,1] for dynamic)
|
|
|
|
|
train_offset: int = 0 # Offset for training data
|
|
|
|
|
valid_offset: int = 0 # Offset for validation data
|
|
|
|
|
test_offset: int = 0 # Offset for testing data
|
|
|
|
|
|
|
|
|
|
batch_size: int = 1 # Batch size for training
|
|
|
|
|
num_epochs: int = 100 # Number of training epochs
|
|
|
|
|
val_freq: int = 1 # Frequency of validation during training
|
|
|
|
|
use_amp: bool = False # Flag to use Automatic Mixed Precision (AMP)
|
|
|
|
|
pretrained_weights: str = "" # Path to pretrained weights for training
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@field_validator("train_size", "valid_size", "test_size", mode="before")
|
|
|
|
|
def validate_sizes(cls, v: Union[int, float]) -> Union[int, float]:
|
|
|
|
|
"""
|
|
|
|
|
Validates size values:
|
|
|
|
|
- If provided as a float, must be in the range (0, 1].
|
|
|
|
|
- If provided as an int, must be non-negative.
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(v, float):
|
|
|
|
|
if not (0 <= v <= 1):
|
|
|
|
|
raise ValueError("When provided as a float, size must be in the range (0, 1]")
|
|
|
|
|
elif isinstance(v, int):
|
|
|
|
|
if v < 0:
|
|
|
|
|
raise ValueError("When provided as an int, size must be non-negative")
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Size must be either an int or a float")
|
|
|
|
|
return v
|
|
|
|
|
|
|
|
|
|
@model_validator(mode="after")
|
|
|
|
|
def validate_split_info(self) -> "DatasetTrainingConfig":
|
|
|
|
|
"""
|
|
|
|
@ -211,18 +212,17 @@ class DatasetConfig(BaseModel):
|
|
|
|
|
if self.is_training:
|
|
|
|
|
if self.training is None:
|
|
|
|
|
raise ValueError("Training configuration must be provided when is_training is True")
|
|
|
|
|
# Check predictions_dir if training.split.test_dir and test_size are set
|
|
|
|
|
if self.training.pre_split.test_dir and self.training.pre_split.test_size > 0:
|
|
|
|
|
if not self.common.predictions_dir:
|
|
|
|
|
raise ValueError("predictions_dir must be provided when training.split.test_dir and test_size are non-zero")
|
|
|
|
|
if self.training.train_size == 0:
|
|
|
|
|
raise ValueError("train_size must be provided when is_training is True")
|
|
|
|
|
if self.training.test_size > 0 and not self.common.predictions_dir:
|
|
|
|
|
raise ValueError("predictions_dir must be provided when test_size is non-zero")
|
|
|
|
|
if self.common.predictions_dir and not os.path.exists(self.common.predictions_dir):
|
|
|
|
|
raise ValueError(f"Path for predictions_dir does not exist: {self.common.predictions_dir}")
|
|
|
|
|
else:
|
|
|
|
|
if self.testing is None:
|
|
|
|
|
raise ValueError("Testing configuration must be provided when is_training is False")
|
|
|
|
|
if self.testing.test_dir and self.testing.test_size > 0:
|
|
|
|
|
if not self.common.predictions_dir:
|
|
|
|
|
raise ValueError("predictions_dir must be provided when testing.test_dir and test_size are non-zero")
|
|
|
|
|
if self.testing.test_size > 0 and not self.common.predictions_dir:
|
|
|
|
|
raise ValueError("predictions_dir must be provided when test_size is non-zero")
|
|
|
|
|
if self.common.predictions_dir and not os.path.exists(self.common.predictions_dir):
|
|
|
|
|
raise ValueError(f"Path for predictions_dir does not exist: {self.common.predictions_dir}")
|
|
|
|
|
return self
|
|
|
|
|