Training Reproducibility with W&B¶
Combine Deeplake's deterministic TQL queries with Weights & Biases experiment logging for full training reproducibility. The query string + dataset commit uniquely identifies the training data, and W&B captures everything else (hyperparameters, metrics, model weights).
Objective¶
Train a Faster R-CNN vehicle detector on the VisDrone dataset, filter out low-quality samples with TQL, log everything to W&B, and compare runs at different resolutions.
Prerequisites¶
pip install deeplake torch torchvision wandb albumentations- A Deeplake API token and a W&B account (
wandb login).
Set credentials first
Complete Code¶
import math
import time
import numpy as np
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import wandb
from deeplake import Client
# --- Configuration ---
WANDB_PROJECT = "deeplake-vehicle-detection"
TRAIN_TABLE = "visdrone_train"
VAL_TABLE = "visdrone_val"
WIDTH = 320
HEIGHT = 256
MIN_AREA = 128
BATCH_SIZE = 16
NUM_EPOCHS = 8
CLASSES_OF_INTEREST = [
"bicycle", "car", "van", "truck",
"tricycle", "awning-tricycle", "bus", "motor",
]
# --- 1. Setup ---
client = Client()
# Skip if already ingested
# client.ingest(TRAIN_TABLE, {"_huggingface": "VisDrone/VisDrone2019-DET-train"})
# client.ingest(VAL_TABLE, {"_huggingface": "VisDrone/VisDrone2019-DET-val"})
# --- 2. Filter low-quality samples with TQL ---
ds_train = client.open_table(TRAIN_TABLE)
train_view = ds_train.query(
"SELECT * WHERE NOT ANY("
" (boxes[:,2] < 20 OR boxes[:,3] < 20) AND labels = 'car'"
")"
)
ds_val = client.open_table(VAL_TABLE)
val_view = ds_val.query("SELECT *")
# --- 3. Augmentation pipelines ---
tform_train = A.Compose([
A.RandomSizedBBoxSafeCrop(width=WIDTH, height=HEIGHT, erosion_rate=0.2),
A.Rotate(limit=20, p=0.5),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
], bbox_params=A.BboxParams(
format="pascal_voc",
label_fields=["class_labels", "bbox_ids"],
min_area=MIN_AREA,
min_visibility=0.6,
))
tform_val = A.Compose([
A.Resize(width=WIDTH, height=HEIGHT),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
], bbox_params=A.BboxParams(
format="pascal_voc",
label_fields=["class_labels", "bbox_ids"],
min_area=MIN_AREA,
min_visibility=0.6,
))
# --- 4. DataLoaders ---
train_loader = DataLoader(
train_view.pytorch(transform=tform_train),
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=8,
collate_fn=lambda batch: tuple(zip(*batch)),
)
val_loader = DataLoader(
val_view.pytorch(transform=tform_val),
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=8,
collate_fn=lambda batch: tuple(zip(*batch)),
)
# --- 5. Model ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def get_model(num_classes):
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
model = get_model(len(CLASSES_OF_INTEREST))
model.to(device)
optimizer = torch.optim.SGD(
model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005
)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
# --- 6. Train with W&B logging ---
config = {
"width": WIDTH, "height": HEIGHT,
"min_area": MIN_AREA, "batch_size": BATCH_SIZE,
"epochs": NUM_EPOCHS,
}
run = wandb.init(project=WANDB_PROJECT, config=config)
wandb.watch(model, log="all", log_freq=50)
wandb.define_metric("epoch")
wandb.define_metric("val_iou", step_metric="epoch")
for epoch in range(NUM_EPOCHS):
# Train
model.train()
for i, batch in enumerate(train_loader):
images = [img.to(device) for img in batch[0]]
targets = [{k: v.to(device) for k, v in t.items()} for t in batch[1]]
loss_dict = model(images, targets)
loss = sum(loss_dict.values())
optimizer.zero_grad()
loss.backward()
optimizer.step()
wandb.log({"loss": loss.item()})
if i % 50 == 0:
print(f"Epoch {epoch}, Batch {i}, Loss: {loss.item():.4f}")
lr_scheduler.step()
# Validate
model.eval()
ious = []
with torch.no_grad():
for batch in val_loader:
images = [img.to(device) for img in batch[0]]
ground_truths = [{k: v.to(device) for k, v in t.items()} for t in batch[1]]
predictions = model(images)
for pred, gt in zip(predictions, ground_truths):
if len(pred["boxes"]) > 0 and len(gt["boxes"]) > 0:
iou = torchvision.ops.box_iou(pred["boxes"], gt["boxes"])
ious.append(iou.max(dim=1).values.mean().item())
else:
ious.append(0.0)
avg_iou = sum(ious) / len(ious) if ious else 0.0
wandb.log({"epoch": epoch, "val_iou": avg_iou})
print(f"Epoch {epoch}: avg IOU = {avg_iou:.4f}")
# --- 7. Save model artifact ---
torch.save(model.state_dict(), "model_weights.pth")
artifact = wandb.Artifact("vehicle_detector", type="model")
artifact.add_file("model_weights.pth")
run.log_artifact(artifact)
run.finish()
print("Training complete. Check W&B for run comparison.")
Step-by-Step Breakdown¶
1. Filter with TQL¶
The VisDrone dataset includes many images where vehicles are tiny specks. For a parking-lot use case those are noise. A single TQL query removes all images where any car bounding box is smaller than 20 pixels:
train_view = ds_train.query(
"SELECT * WHERE NOT ANY("
" (boxes[:,2] < 20 OR boxes[:,3] < 20) AND labels = 'car'"
")"
)
This eliminates ~50% of samples and produces a cleaner training set.
2. Log everything to W&B¶
Initialize the W&B run before creating DataLoaders. W&B automatically captures the dataset URI and commit ID from Deeplake, creating full provenance.
3. Compare runs¶
To improve results, change resolution and retrain:
# Run 1: WIDTH=160, HEIGHT=128, MIN_AREA=32 -> avg IOU ~0.29
# Run 2: WIDTH=320, HEIGHT=256, MIN_AREA=128 -> avg IOU ~0.37
W&B's run comparison dashboard shows the improvement side by side, with full data lineage for both runs.
4. Reproducibility guarantee¶
The combination of:
- TQL query string: deterministic filter
- Dataset commit ID: exact data version
- W&B config: all hyperparameters
means any run can be reproduced exactly. No external metadata store or DVC-style tracking needed.
What to try next¶
- Training with Data Lineage: data lineage without W&B.
- Evaluating Model Performance: per-sample loss analysis.
- GPU-Streaming Pipeline: optimize throughput for large-scale training.