13
Neural Networks
Autograd
Training
Neural Network Training
This example demonstrates the full neural network training loop in Deepbox. You build a Sequential model (Linear → ReLU → Linear), create training data as GradTensors via parameter(), define an Adam optimizer, and run a training loop: forward pass → compute loss → backward() → optimizer.step() → zeroGrad(). You also learn custom Module subclasses, SGD, and inference mode.
Deepbox Modules Used
deepbox/ndarraydeepbox/nndeepbox/optimWhat You Will Learn
- Build Sequential models from Linear, ReLU, and other layers
- Use parameter() to create GradTensors that track gradients
- Training loop: forward → loss → backward → optimizer.step → zeroGrad
- Use Adam for fast convergence; SGD with momentum for fine-tuning
- Switch to plain tensor .forward() for inference (no graph overhead)
Source Code
13-neural-network-training/index.ts
1import { isNumericTypedArray, isTypedArray } from "deepbox/core";2import { GradTensor, parameter, type Tensor, tensor } from "deepbox/ndarray";3import { Linear, Module, mseLoss, ReLU, Sequential } from "deepbox/nn";4import { Adam, SGD } from "deepbox/optim";56console.log("=== Neural Network Training ===\n");78// Helper to read a scalar value from tensor data9const scalarValue = (t: Tensor): number => {10 const d = t.data;11 if (!isTypedArray(d) || !isNumericTypedArray(d)) return NaN;12 return Number(d[t.offset] ?? 0);13};1415// ---------------------------------------------------------------------------16// Part 1: Sequential model with autograd training17// ---------------------------------------------------------------------------18console.log("--- Part 1: Sequential Model with Autograd ---");1920const model = new Sequential(new Linear(2, 16), new ReLU(), new Linear(16, 1));2122const paramCount = Array.from(model.parameters()).length;23console.log("Model parameters:", paramCount);2425// Training data: y = x0 + 2*x126const X = parameter([27 [1, 0],28 [0, 1],29 [1, 1],30 [2, 1],31 [1, 2],32 [3, 1],33 [2, 2],34 [0, 3],35]);36const yTargets = parameter([[1], [2], [3], [4], [5], [5], [6], [6]]);3738const optimizer = new Adam(model.parameters(), { lr: 0.01 });3940console.log("Training for 200 epochs...");41for (let epoch = 0; epoch < 200; epoch++) {42 // Forward pass with GradTensor builds the computation graph43 const pred = model.forward(X);4445 // Compute MSE loss using GradTensor ops (tracks gradients)46 if (!(pred instanceof GradTensor)) throw new Error("Expected GradTensor from forward");47 const diff = pred.sub(yTargets);48 const loss = diff.mul(diff).mean();4950 // Backward pass and optimize51 optimizer.zeroGrad();52 loss.backward();53 optimizer.step();5455 if (epoch % 50 === 0) {56 console.log(` Epoch ${epoch}: loss = ${scalarValue(loss.tensor).toFixed(6)}`);57 }58}5960// Evaluate using plain Tensor forward pass (no gradient tracking)61const finalPred = model.forward(X.tensor);62console.log("Predictions:", finalPred.toString());63console.log("Targets: ", yTargets.tensor.toString());6465// ---------------------------------------------------------------------------66// Part 2: Custom Module67// ---------------------------------------------------------------------------68console.log("\n--- Part 2: Custom Module ---");6970class TwoLayerNet extends Module {71 fc1: Linear;72 relu: ReLU;73 fc2: Linear;7475 constructor(inputDim: number, hiddenDim: number, outputDim: number) {76 super();77 this.fc1 = new Linear(inputDim, hiddenDim);78 this.relu = new ReLU();79 this.fc2 = new Linear(hiddenDim, outputDim);80 this.registerModule("fc1", this.fc1);81 this.registerModule("relu", this.relu);82 this.registerModule("fc2", this.fc2);83 }8485 override forward(x: GradTensor): GradTensor;86 override forward(x: Tensor): Tensor;87 override forward(x: Tensor | GradTensor): Tensor | GradTensor {88 if (x instanceof GradTensor) {89 let out: GradTensor = this.fc1.forward(x);90 out = this.relu.forward(out);91 return this.fc2.forward(out);92 }93 let out: Tensor = this.fc1.forward(x);94 out = this.relu.forward(out);95 return this.fc2.forward(out);96 }97}9899const net = new TwoLayerNet(2, 8, 1);100const netParamCount = Array.from(net.parameters()).length;101console.log("Custom module parameters:", netParamCount);102103// Train/eval mode104net.train();105console.log("Training mode:", net.training);106net.eval();107console.log("Eval mode:", net.training);108109// State dict for serialization110const state = net.stateDict();111console.log("State dict keys:", Object.keys(state.parameters).join(", "));112113// ---------------------------------------------------------------------------114// Part 3: Plain Tensor forward pass with mseLoss115// ---------------------------------------------------------------------------116console.log("\n--- Part 3: Plain Tensor Forward + mseLoss ---");117118const inputTensor = tensor([119 [1, 0],120 [0, 1],121 [1, 1],122 [2, 1],123]);124const targetTensor = tensor([[1], [2], [3], [4]]);125126// Forward pass with plain Tensors — no autograd, just inference127const rawOutput = model.forward(inputTensor);128const output = rawOutput instanceof GradTensor ? rawOutput.tensor : rawOutput;129const evalLoss = mseLoss(output, targetTensor);130console.log("Eval loss (plain Tensor):", scalarValue(evalLoss).toFixed(6));131132// ---------------------------------------------------------------------------133// Part 4: SGD with momentum134// ---------------------------------------------------------------------------135console.log("\n--- Part 4: SGD with Momentum ---");136137const sgdModel = new Sequential(new Linear(2, 8), new ReLU(), new Linear(8, 1));138const sgdOptimizer = new SGD(sgdModel.parameters(), {139 lr: 0.01,140 momentum: 0.9,141});142143for (let epoch = 0; epoch < 100; epoch++) {144 const pred = sgdModel.forward(X);145 if (!(pred instanceof GradTensor)) throw new Error("Expected GradTensor from forward");146 const diff = pred.sub(yTargets);147 const loss = diff.mul(diff).mean();148 sgdOptimizer.zeroGrad();149 loss.backward();150 sgdOptimizer.step();151}152153const rawSgdPred = sgdModel.forward(X.tensor);154const sgdPred = rawSgdPred instanceof GradTensor ? rawSgdPred.tensor : rawSgdPred;155const sgdLoss = mseLoss(sgdPred, yTargets.tensor);156console.log("SGD final loss:", scalarValue(sgdLoss).toFixed(6));157158console.log("\n=== Neural Network Training Complete ===");Console Output
$ npx tsx 13-neural-network-training/index.ts
=== Neural Network Training ===
--- Part 1: Sequential Model with Autograd ---
Model parameters: 4
Training for 200 epochs...
Epoch 0: loss = 21.909481
Epoch 50: loss = 0.114061
Epoch 100: loss = 0.022785
Epoch 150: loss = 0.015727
Predictions: tensor([[0.8928]
[2.203]
[3.119]
...
[5.016]
[5.901]
[5.940]], dtype=float32)
Targets: tensor([[1]
[2]
[3]
...
[5]
[6]
[6]], dtype=float32)
--- Part 2: Custom Module ---
Custom module parameters: 4
Training mode: true
Eval mode: false
State dict keys: fc1.weight, fc1.bias, fc2.weight, fc2.bias
--- Part 3: Plain Tensor Forward + mseLoss ---
Eval loss (plain Tensor): 0.017774
--- Part 4: SGD with Momentum ---
SGD final loss: 0.004190
=== Neural Network Training Complete ===