PyTorch Guide 🔥
Complete guide to using MLPotion with PyTorch - the researcher's favorite framework!
Why PyTorch + MLPotion? 🤔
- Research-Friendly: Dynamic computation graphs, easy debugging
- Pythonic: Feels like native Python code
- Flexible: Full control over training loops
- Ecosystem: Huge community, extensive libraries
- MLPotion Benefits: Type-safe, modular components with consistent APIs
Installation 📥
poetry add mlpotion -E pytorch
This installs:
- torch>=2.0
- torchvision>=0.16
- All PyTorch-specific MLPotion components
Quick Example 🚀
from mlpotion.frameworks.pytorch import (
CSVDataset,
CSVDataLoader,
ModelTrainer,
ModelTrainingConfig,
)
import torch.nn as nn
# Load data
dataset = CSVDataset("data.csv", label_name="target")
factory = CSVDataLoader(batch_size=32, shuffle=True)
dataloader = factory.load(dataset)
# Create model
class SimpleModel(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(10, 64),
nn.ReLU(),
nn.Linear(64, 1)
)
def forward(self, x):
return self.layers(x)
model = SimpleModel()
# Train
trainer = ModelTrainer()
config = ModelTrainingConfig(
epochs=10,
learning_rate=0.001,
device="cuda" if torch.cuda.is_available() else "cpu"
)
result = trainer.train(model, dataloader, config)
print(f"Final loss: {result.metrics['loss']:.4f}")
Data Loading 📊
CSV Dataset
from mlpotion.frameworks.pytorch import CSVDataset
dataset = CSVDataset(
file_pattern="data.csv", # File path or pattern
label_name="target", # Label column name
column_names=None, # Auto-detect or specify
dtype=torch.float32, # Data type
)
# Use like any PyTorch dataset
print(f"Dataset size: {len(dataset)}")
features, label = dataset[0]
DataLoader Factory
from mlpotion.frameworks.pytorch import CSVDataLoader
factory = CSVDataLoader(
batch_size=32,
shuffle=True,
num_workers=4, # Parallel data loading
pin_memory=True, # Faster GPU transfer
drop_last=False,
persistent_workers=True, # Keep workers alive
)
# Create dataloaders
train_loader = factory.load(train_dataset)
val_loader = factory.load(val_dataset)
test_loader = factory.load(test_dataset)
Model Training 🎓
Basic Training
from mlpotion.frameworks.pytorch import ModelTrainer, ModelTrainingConfig
config = ModelTrainingConfig(
epochs=10,
learning_rate=0.001,
device="cuda",
optimizer="adam",
loss_fn="mse",
verbose=True,
)
trainer = ModelTrainer()
result = trainer.train(model, train_loader, config)
print(f"Training time: {result.training_time:.2f}s")
print(f"Final loss: {result.metrics['loss']:.4f}")
Advanced Training Configuration
import torch
import torch.nn as nn
# Using string optimizer name
config = ModelTrainingConfig(
epochs=100,
learning_rate=0.001,
device="cuda" if torch.cuda.is_available() else "cpu",
optimizer="adamw", # String name
loss_fn="mse",
verbose=True,
)
# Using custom optimizer instance
custom_optimizer = torch.optim.Adam(
model.parameters(),
lr=0.001,
betas=(0.9, 0.999),
weight_decay=0.01,
amsgrad=True,
)
config = ModelTrainingConfig(
epochs=100,
device="cuda",
optimizer=custom_optimizer, # Pass optimizer instance
loss_fn="mse",
)
# Using custom loss function
custom_loss = nn.SmoothL1Loss(beta=1.0)
config = ModelTrainingConfig(
epochs=100,
learning_rate=0.001,
optimizer="adam",
loss_fn=custom_loss, # Custom loss instance
)
result = trainer.train(model, train_loader, config, validation_dataloader=val_loader)
Callbacks and TensorBoard
# Custom callback class
class TrainingCallback:
def on_train_begin(self):
print("🚀 Training started!")
def on_epoch_end(self, epoch, metrics):
print(f"Epoch {epoch + 1} completed: {metrics}")
# Add custom logic (e.g., save checkpoint, adjust LR)
def on_train_end(self):
print("✅ Training completed!")
# Early stopping callback example
class EarlyStopping:
def __init__(self, patience=5, min_delta=0.001):
self.patience = patience
self.min_delta = min_delta
self.best_loss = float('inf')
self.counter = 0
self.should_stop = False
def on_epoch_end(self, epoch, metrics):
val_loss = metrics.get('val_loss')
if val_loss is None:
return
if val_loss < self.best_loss - self.min_delta:
self.best_loss = val_loss
self.counter = 0
else:
self.counter += 1
if self.counter >= self.patience:
print(f"Early stopping triggered at epoch {epoch + 1}")
self.should_stop = True
# Configure with callbacks and TensorBoard
config = ModelTrainingConfig(
epochs=100,
learning_rate=0.001,
optimizer="adam",
loss_fn="mse",
# Add callbacks
callbacks=[
TrainingCallback(),
EarlyStopping(patience=10),
],
# Enable TensorBoard
use_tensorboard=True,
tensorboard_log_dir="logs/pytorch_experiment",
tensorboard_params={
"comment": "My experiment",
"flush_secs": 30,
},
)
trainer = ModelTrainer()
result = trainer.train(model, train_loader, config, validation_dataloader=val_loader)
# View TensorBoard logs
# tensorboard --logdir=logs/pytorch_experiment
Custom Loss Functions
# Method 1: Use nn.Module
class CustomLoss(nn.Module):
def __init__(self, alpha=0.5):
super().__init__()
self.alpha = alpha
def forward(self, predictions, targets):
mse = nn.functional.mse_loss(predictions, targets)
mae = nn.functional.l1_loss(predictions, targets)
return self.alpha * mse + (1 - self.alpha) * mae
# Method 2: Use callable function
def custom_loss_fn(predictions, targets):
return torch.mean((predictions - targets) ** 2) + 0.1 * torch.mean(torch.abs(predictions - targets))
# Use in config
config = ModelTrainingConfig(
epochs=50,
learning_rate=0.001,
optimizer="adam",
loss_fn=CustomLoss(alpha=0.7), # or custom_loss_fn
)
Custom Training Loop
from mlpotion.frameworks.pytorch import ModelTrainer
import torch
class CustomTrainer(ModelTrainer):
def training_step(self, model, batch, device):
"""Custom training step logic."""
features, labels = batch
features, labels = features.to(device), labels.to(device)
# Forward pass
predictions = model(features)
loss = self.criterion(predictions, labels)
# Add custom regularization
l2_reg = sum(p.pow(2).sum() for p in model.parameters())
loss = loss + 0.001 * l2_reg
return loss
def validation_step(self, model, batch, device):
"""Custom validation step logic."""
features, labels = batch
features, labels = features.to(device), labels.to(device)
with torch.no_grad():
predictions = model(features)
loss = self.criterion(predictions, labels)
return loss
# Use custom trainer
custom_trainer = CustomTrainer()
result = custom_trainer.train(model, train_loader, config)
Model Evaluation 📊
from mlpotion.frameworks.pytorch import ModelEvaluator, ModelEvaluationConfig
config = ModelEvaluationConfig(
device="cuda",
batch_size=32,
metrics=["mse", "mae"],
verbose=True,
)
evaluator = ModelEvaluator()
result = evaluator.evaluate(model, test_loader, config)
print(f"Test loss: {result.metrics['loss']:.4f}")
print(f"Test MAE: {result.metrics['mae']:.4f}")
Model Persistence 💾
from mlpotion.frameworks.pytorch import ModelPersistence
persistence = ModelPersistence(path="models/my_model.pth", model=model)
# Save model (state_dict - recommended)
persistence.save()
# Save full model
persistence.save(save_full_model=True)
# Load model
loader = ModelPersistence(path="models/my_model.pth")
loaded_model, metadata = loader.load(
model_class=SimpleModel, # Need model class for state_dict
)
# Load full model (auto-detected)
loader_full = ModelPersistence(path="models/my_model_full.pth")
loaded_model_full, _ = loader_full.load()
Model Export 📤
TorchScript
from mlpotion.frameworks.pytorch import ModelExporter, ModelExportConfig
exporter = ModelExporter()
config = ModelExportConfig(
format="torchscript",
method="trace", # or "script"
example_inputs=torch.randn(1, 10), # For tracing
)
result = exporter.export(model, "exports/model.pt", config)
print(f"Exported to: {result.export_path}")
ONNX
config = ModelExportConfig(
format="onnx",
input_names=["features"],
output_names=["predictions"],
dynamic_axes={"features": {0: "batch_size"}},
opset_version=14,
)
result = exporter.export(model, "exports/model.onnx", config)
Common Patterns 🎯
Pattern: Train-Val-Test Pipeline
# Load data
train_dataset = CSVDataset("train.csv", label_name="target")
val_dataset = CSVDataset("val.csv", label_name="target")
test_dataset = CSVDataset("test.csv", label_name="target")
# Create dataloaders
factory = CSVDataLoader(batch_size=32, shuffle=True, num_workers=4)
train_loader = factory.load(train_dataset)
val_loader = factory.load(val_dataset)
test_loader = factory.load(test_dataset)
# Train with validation
trainer = ModelTrainer()
config = ModelTrainingConfig(
epochs=50,
learning_rate=0.001,
early_stopping=True,
early_stopping_patience=10,
)
result = trainer.train(model, train_loader, config, val_loader=val_loader)
# Evaluate on test set
evaluator = ModelEvaluator()
test_metrics = evaluator.evaluate(result.model, test_loader, config)
print(f"Best epoch: {result.best_epoch}")
print(f"Test loss: {test_metrics.metrics['loss']:.4f}")
Pattern: Multi-GPU Training
import torch
import torch.nn as nn
# Wrap model for multi-GPU
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
device = "cuda"
else:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
# Train as usual
config = ModelTrainingConfig(
epochs=50,
learning_rate=0.001,
device=device,
)
result = trainer.train(model, train_loader, config)
Pattern: Mixed Precision Training
config = ModelTrainingConfig(
epochs=50,
learning_rate=0.001,
use_amp=True, # Enable automatic mixed precision
device="cuda",
)
result = trainer.train(model, train_loader, config)
Best Practices 💡
- Use DataLoaders: Always use
CSVDataLoaderfor efficient loading - Enable num_workers: Set
num_workers>0for parallel data loading - Pin Memory: Use
pin_memory=Truefor faster GPU transfer - AMP Training: Enable mixed precision for faster training
- Gradient Clipping: Prevent exploding gradients with
clip_grad_norm - State Dict: Save models as state_dict for better compatibility
Next Steps 🚀
- TensorFlow Guide → - Compare with TensorFlow
- ZenML Integration → - Add MLOps
- API Reference → - Detailed API docs
PyTorch + MLPotion = Research + Production! 🔥