Usage Examples

This page provides several complete examples demonstrating the main features of arch_eval.

Basic Training with MNIST

Train a simple CNN on MNIST using torchvision data.

import torch
import torch.nn as nn
import torch.nn.functional as F
from arch_eval import Trainer, TrainingConfig
from torchvision import transforms

# ---------- Model ----------
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# ---------- Configuration ----------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

config = TrainingConfig(
    dataset="mnist",
    dataset_params={"root": "./data", "split": "train", "download": True},
    transform=transform,
    training_args={
        "batch_size": 64,
        "learning_rate": 0.001,
        "num_epochs": 5,
    },
    task="classification",
    device="cuda" if torch.cuda.is_available() else "cpu",
    realtime=True,
    save_plot=["loss", "accuracy"],
    log_to_wandb=False,
    seed=42,
)

# ---------- Train ----------
model = SimpleCNN()
trainer = Trainer(model, config)
history = trainer.train()

print(f"Final validation accuracy: {history['val_accuracy'][-1]:.4f}")

Benchmarking Two MLP Variants

Compare a small and a large MLP on synthetic data.

import torch.nn as nn
from arch_eval import Benchmark, BenchmarkConfig

class MLP(nn.Module):
    def __init__(self, hidden=256):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(128, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 64)
        )

    def forward(self, x):
        return self.net(x)

models = [
    {"name": "Small MLP", "model": MLP(hidden=128)},
    {"name": "Large MLP", "model": MLP(hidden=512)},
]

config = BenchmarkConfig(
    dataset="synthetic classification",
    dataset_params={
        "n_samples": 5000,
        "n_features": 128,
        "n_classes": 64,
        "n_informative": 64,
    },
    training_args={
        "batch_size": 32,
        "learning_rate": 0.001,
        "num_epochs": 10,
    },
    compare_metrics=["accuracy", "loss"],
    parallel=True,               # run two models concurrently
    use_processes=False,         # use threads (safe for CPU; for GPU, keep sequential)
    device="cpu",                # force CPU for this example
)

benchmark = Benchmark(models, config)
results = benchmark.run()
print(results)

Using Callbacks – Early Stopping and Checkpointing

Train a model with early stopping and model checkpointing.

from arch_eval import Trainer, TrainingConfig
from arch_eval import EarlyStopping, ModelCheckpoint

config = TrainingConfig(
    dataset="synthetic classification",
    dataset_params={"n_samples": 1000, "n_features": 20, "n_classes": 5},
    training_args={"num_epochs": 50, "batch_size": 32, "learning_rate": 0.01},
    task="classification",
    callbacks=[
        EarlyStopping(monitor="val_loss", patience=5, mode="min"),
        ModelCheckpoint(
            filepath="./checkpoints/model-{epoch:02d}.pt",
            monitor="val_accuracy",
            save_best_only=True,
            mode="max"
        )
    ],
    checkpoint_dir="./checkpoints",
)

model = nn.Linear(20, 5)
trainer = Trainer(model, config)
history = trainer.train()

Custom Dataset from NumPy Arrays

Use your own data stored as NumPy arrays.

import numpy as np
import torch
from arch_eval import Trainer, TrainingConfig

# Generate random data
X = np.random.randn(1000, 50).astype(np.float32)
y = (X.sum(axis=1) > 0).astype(np.int64)   # binary labels

config = TrainingConfig(
    dataset=(X, y),          # tuple (data, targets)
    training_args={"batch_size": 64, "learning_rate": 0.001, "num_epochs": 5},
    task="classification",
)

model = torch.nn.Linear(50, 2)   # 2 classes
trainer = Trainer(model, config)
trainer.train()

Distributed Training with DDP

Launch script using torchrun. Assume the script is train_ddp.py.

# train_ddp.py
import torch
import torch.nn as nn
import torch.distributed as dist
from arch_eval import Trainer, TrainingConfig, DistributedBackend

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(128, 10)

    def forward(self, x):
        return self.fc(x)

# Get rank and world size from environment (set by torchrun)
rank = int(os.environ["RANK"])
world_size = int(os.environ["WORLD_SIZE"])

config = TrainingConfig(
    dataset="synthetic classification",
    dataset_params={"n_samples": 10000, "n_features": 128, "n_classes": 10},
    training_args={"batch_size": 64, "num_epochs": 10, "learning_rate": 0.01},
    distributed_backend=DistributedBackend.DISTRIBUTED,
    distributed_world_size=world_size,
    distributed_rank=rank,
    # Optional: shard dataset so each GPU sees different samples
    dataset_shard={"num_shards": world_size, "shard_id": rank},
    device="cuda",
)

model = Model()
trainer = Trainer(model, config)
trainer.train()

Run with:

torchrun --nproc_per_node=2 train_ddp.py

Profiling and Video Recording

Enable the profiler and record a video of the loss curve.

config = TrainingConfig(
    dataset="synthetic classification",
    dataset_params={"n_samples": 1000, "n_features": 20, "n_classes": 5},
    training_args={"num_epochs": 5},
    profiler={
        "enabled": True,
        "activities": ["cpu", "cuda"],
        "schedule": {"wait": 1, "warmup": 1, "active": 2},
        "trace_path": "./profiler_trace"
    },
    save_video=["loss"],          # record loss over time
    realtime=False,               # disable live window (optional)
)

model = nn.Linear(20, 5)
trainer = Trainer(model, config)
trainer.train()
# After training, a video file `loss.mp4` will be created (if ffmpeg is installed).

Using a HuggingFace Dataset

Load the IMDB dataset from Hugging Face and train a simple text classifier.

from datasets import load_dataset
from arch_eval import Trainer, TrainingConfig
import torch.nn as nn

# Load dataset
dataset = load_dataset("imdb")

class TextClassifier(nn.Module):
    def __init__(self, vocab_size=10000, embed_dim=128, num_classes=2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, input_ids):
        # input_ids: (batch, seq_len)
        emb = self.embedding(input_ids).mean(dim=1)  # average pooling
        return self.fc(emb)

config = TrainingConfig(
    dataset=dataset["train"],          # pass the dataset object
    dataset_streaming=False,           # set to True for IterableDataset
    training_args={"batch_size": 16, "num_epochs": 1},
    task="classification",
)

model = TextClassifier()
trainer = Trainer(model, config)
trainer.train()

(Note: This is a simplified example; real text classification requires proper tokenization and possibly a collate function.)

Custom Callback – Logging to File

Create a callback that writes metrics to a CSV file.

import csv
from arch_eval import Callback

class CSVLogger(Callback):
    def __init__(self, filename="log.csv"):
        self.filename = filename
        self.file = open(filename, "w", newline="")
        self.writer = None

    def on_log(self, trainer, metrics, step):
        if self.writer is None:
            self.writer = csv.DictWriter(self.file, fieldnames=["step"] + list(metrics.keys()))
            self.writer.writeheader()
        row = {"step": step, **metrics}
        self.writer.writerow(row)
        self.file.flush()

    def on_train_end(self, trainer):
        self.file.close()

# Use it
config = TrainingConfig(
    ...,
    callbacks=[CSVLogger("training_log.csv")]
)

Using the Plugin System

Create a simple plugin that prints a message at the start of each epoch.

File: my_plugin.py (place it in your Python path)

from arch_eval.plugins import hook

@hook("on_epoch_start")
def epoch_start(trainer, epoch):
    print(f"Starting epoch {epoch}!")

Now run any training script; the plugin will be discovered automatically and the hook will execute.


These examples cover most of the library’s functionality. For further details, refer to the Guide and API Reference.