Matrix Mathematics
Six coding challenges covering the linear algebra operations that underpin all of machine learning: dot products for similarity, matrix multiplication for neural network layers, and decompositions (eigenvalue, SVD) for dimensionality reduction.
Challenge 1: Dot Product for Similarity
Problem: Given a query embedding q of shape (D,) and a matrix of document embeddings docs of shape (N, D), compute the dot product similarity between the query and every document. Return the indices of the top-K most similar documents.
import numpy as np
# ---- CHALLENGE ----
def dot_product_retrieval(q, docs, k):
"""Return indices of top-k documents by dot product similarity with query."""
pass
# ---- SOLUTION ----
def dot_product_retrieval(q, docs, k):
"""Return indices of top-k documents by dot product similarity."""
# (N, D) @ (D,) -> (N,) dot product with each doc
similarities = docs @ q
# np.argpartition is O(n) vs O(n log n) for full sort
# It partitions so that the k largest are in the last k positions
top_k_idx = np.argpartition(similarities, -k)[-k:]
# Sort the top-k by similarity (descending)
top_k_idx = top_k_idx[np.argsort(similarities[top_k_idx])[::-1]]
return top_k_idx
# ---- VERIFICATION ----
np.random.seed(42)
q = np.random.randn(128)
docs = np.random.randn(10000, 128)
# Make doc 42 very similar to query
docs[42] = q * 2 + np.random.randn(128) * 0.01
top_5 = dot_product_retrieval(q, docs, 5)
assert 42 in top_5, "Doc 42 should be in top 5"
assert top_5[0] == 42, "Doc 42 should be the most similar"
print("Challenge 1 passed!")
Challenge 2: Neural Network Forward Pass
Problem: Implement a two-layer neural network forward pass using only NumPy matrix operations. Given input X of shape (N, D), weights W1 of shape (D, H), bias b1 of shape (H,), weights W2 of shape (H, C), and bias b2 of shape (C,), compute the output logits. Use ReLU activation between layers.
import numpy as np
# ---- CHALLENGE ----
def forward_pass(X, W1, b1, W2, b2):
"""Two-layer neural network: X -> Linear -> ReLU -> Linear -> logits."""
pass
# ---- SOLUTION ----
def forward_pass(X, W1, b1, W2, b2):
"""Two-layer neural network forward pass."""
# Layer 1: linear transformation
# (N, D) @ (D, H) + (H,) -> (N, H) via broadcasting
z1 = X @ W1 + b1
# ReLU activation: max(0, x) element-wise
a1 = np.maximum(0, z1)
# Layer 2: linear transformation
# (N, H) @ (H, C) + (C,) -> (N, C)
logits = a1 @ W2 + b2
return logits
# ---- VERIFICATION ----
N, D, H, C = 32, 784, 256, 10 # batch, input, hidden, classes
X = np.random.randn(N, D)
W1 = np.random.randn(D, H) * 0.01
b1 = np.zeros(H)
W2 = np.random.randn(H, C) * 0.01
b2 = np.zeros(C)
logits = forward_pass(X, W1, b1, W2, b2)
assert logits.shape == (N, C), f"Expected ({N}, {C}), got {logits.shape}"
print("Challenge 2 passed!")
Challenge 3: Linear Regression Closed-Form
Problem: Implement the closed-form solution for linear regression using the normal equation: w = (X^T X)^{-1} X^T y. Handle the case where X^T X is singular by adding regularization (ridge regression): w = (X^T X + lambda * I)^{-1} X^T y.
import numpy as np
# ---- CHALLENGE ----
def linear_regression_closed_form(X, y, reg_lambda=0.0):
"""Solve linear regression using normal equation with optional L2 regularization."""
pass
# ---- SOLUTION ----
def linear_regression_closed_form(X, y, reg_lambda=0.0):
"""Normal equation: w = (X^T X + lambda*I)^{-1} X^T y"""
D = X.shape[1]
# X^T X: (D, N) @ (N, D) -> (D, D)
XtX = X.T @ X
# Add regularization to diagonal
if reg_lambda > 0:
XtX += reg_lambda * np.eye(D)
# X^T y: (D, N) @ (N,) -> (D,)
Xty = X.T @ y
# Solve the system (more numerically stable than explicit inverse)
w = np.linalg.solve(XtX, Xty)
return w
# ---- VERIFICATION ----
np.random.seed(42)
N, D = 100, 5
X = np.random.randn(N, D)
w_true = np.array([1.0, -2.0, 3.0, -4.0, 5.0])
y = X @ w_true + np.random.randn(N) * 0.1 # small noise
w_hat = linear_regression_closed_form(X, y, reg_lambda=0.01)
assert np.allclose(w_hat, w_true, atol=0.1), f"Weights should be close to true: {w_hat}"
# Test with predictions
y_pred = X @ w_hat
mse = np.mean((y - y_pred) ** 2)
assert mse < 0.05, f"MSE too high: {mse}"
print("Challenge 3 passed!")
Challenge 4: Determinant and Matrix Properties
Problem: Given a covariance matrix, compute its determinant to check if the Gaussian distribution is well-defined. Implement a function that checks if a matrix is positive semi-definite (all eigenvalues >= 0), which is required for valid covariance matrices. Also compute the condition number to assess numerical stability.
import numpy as np
# ---- CHALLENGE ----
def analyze_covariance(cov_matrix):
"""Analyze a covariance matrix: determinant, PSD check, condition number."""
pass
# ---- SOLUTION ----
def analyze_covariance(cov_matrix):
"""Analyze a covariance matrix for validity and numerical properties."""
det = np.linalg.det(cov_matrix)
eigenvalues = np.linalg.eigvalsh(cov_matrix) # eigvalsh for symmetric
is_psd = np.all(eigenvalues >= -1e-10) # tolerance for numerical errors
condition_number = np.linalg.cond(cov_matrix)
is_well_conditioned = condition_number < 1e10
return {
"determinant": det,
"eigenvalues": eigenvalues,
"is_psd": is_psd,
"condition_number": condition_number,
"is_well_conditioned": is_well_conditioned,
"log_det": np.log(det) if det > 0 else -np.inf
}
# ---- VERIFICATION ----
# Valid covariance matrix (from data)
data = np.random.randn(100, 3)
cov = np.cov(data.T) # (3, 3)
result = analyze_covariance(cov)
assert result["is_psd"], "Covariance from data should be PSD"
assert result["determinant"] > 0, "Valid covariance should have positive determinant"
# Singular matrix (rank-deficient)
singular = np.array([[1, 2], [2, 4]]) # row 2 = 2 * row 1
result2 = analyze_covariance(singular)
assert abs(result2["determinant"]) < 1e-10, "Singular matrix det should be ~0"
print("Challenge 4 passed!")
Challenge 5: PCA via Eigenvalue Decomposition
Problem: Implement Principal Component Analysis from scratch using eigenvalue decomposition. Given data X of shape (N, D), reduce it to k dimensions. Return the projected data, the principal components, and the explained variance ratio.
import numpy as np
# ---- CHALLENGE ----
def pca(X, k):
"""PCA: reduce X from (N, D) to (N, k). Return projected data,
principal components, and explained variance ratio."""
pass
# ---- SOLUTION ----
def pca(X, k):
"""PCA via eigenvalue decomposition of covariance matrix."""
# Step 1: center the data
mean = X.mean(axis=0)
X_centered = X - mean
# Step 2: compute covariance matrix
# (D, N) @ (N, D) / (N-1) -> (D, D)
cov = (X_centered.T @ X_centered) / (X.shape[0] - 1)
# Step 3: eigenvalue decomposition (use eigh for symmetric matrices)
eigenvalues, eigenvectors = np.linalg.eigh(cov)
# Step 4: sort by eigenvalue descending
# eigh returns in ascending order, so reverse
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
# Step 5: select top-k components
components = eigenvectors[:, :k] # (D, k)
# Step 6: project data
X_projected = X_centered @ components # (N, D) @ (D, k) -> (N, k)
# Step 7: explained variance ratio
explained_var_ratio = eigenvalues[:k] / eigenvalues.sum()
return X_projected, components, explained_var_ratio
# ---- VERIFICATION ----
np.random.seed(42)
# Create data with clear principal directions
N = 500
X = np.random.randn(N, 5)
# Make first two dimensions have much higher variance
X[:, 0] *= 10
X[:, 1] *= 5
X_proj, components, var_ratio = pca(X, k=2)
assert X_proj.shape == (N, 2), f"Expected ({N}, 2), got {X_proj.shape}"
assert components.shape == (5, 2), f"Expected (5, 2), got {components.shape}"
assert var_ratio[0] > var_ratio[1], "First component should explain more variance"
assert var_ratio.sum() > 0.8, f"Top 2 components should explain >80% variance, got {var_ratio.sum():.2f}"
print("Challenge 5 passed!")
Challenge 6: SVD for Low-Rank Approximation
Problem: Implement low-rank matrix approximation using SVD. Given a matrix A of shape (M, N) and rank k, compute the best rank-k approximation. This is used in recommendation systems (matrix factorization), image compression, and embedding compression.
import numpy as np
# ---- CHALLENGE ----
def low_rank_approx(A, k):
"""Compute best rank-k approximation of matrix A using SVD."""
pass
def truncated_svd_embeddings(embeddings, k):
"""Compress embeddings from (N, D) to (N, k) using SVD."""
pass
# ---- SOLUTION ----
def low_rank_approx(A, k):
"""Best rank-k approximation via truncated SVD."""
# Full SVD: A = U @ diag(S) @ Vt
U, S, Vt = np.linalg.svd(A, full_matrices=False)
# Truncate to rank k
U_k = U[:, :k] # (M, k)
S_k = S[:k] # (k,)
Vt_k = Vt[:k, :] # (k, N)
# Reconstruct: A_k = U_k @ diag(S_k) @ Vt_k
A_k = U_k * S_k @ Vt_k # broadcasting S_k across columns of U_k
# Compression ratio
original_params = A.shape[0] * A.shape[1]
compressed_params = k * (A.shape[0] + A.shape[1] + 1)
compression_ratio = original_params / compressed_params
return A_k, compression_ratio
def truncated_svd_embeddings(embeddings, k):
"""Reduce embedding dimensionality using SVD."""
# Center embeddings
mean = embeddings.mean(axis=0)
centered = embeddings - mean
# SVD
U, S, Vt = np.linalg.svd(centered, full_matrices=False)
# Project to k dimensions: use U_k * S_k as the reduced embeddings
reduced = U[:, :k] * S[:k] # (N, k)
# Reconstruction: reduced @ Vt_k + mean
Vt_k = Vt[:k, :] # (k, D) - needed for reconstruction
return reduced, Vt_k, mean
# ---- VERIFICATION ----
np.random.seed(42)
# Create a rank-3 matrix with noise
A_true = np.random.randn(100, 3) @ np.random.randn(3, 50)
A_noisy = A_true + np.random.randn(100, 50) * 0.1
A_approx, ratio = low_rank_approx(A_noisy, k=3)
error = np.linalg.norm(A_true - A_approx) / np.linalg.norm(A_true)
assert error < 0.15, f"Reconstruction error too high: {error:.4f}"
assert ratio > 1, f"Should achieve compression, got ratio {ratio:.2f}"
# Test embedding compression
embeddings = np.random.randn(1000, 768) # BERT-like embeddings
reduced, Vt_k, mean = truncated_svd_embeddings(embeddings, k=64)
assert reduced.shape == (1000, 64), f"Expected (1000, 64), got {reduced.shape}"
# Reconstruct and check
reconstructed = reduced @ Vt_k + mean
error = np.linalg.norm(embeddings - reconstructed) / np.linalg.norm(embeddings)
assert error < 1.0, f"Reconstruction error: {error:.4f}"
print("Challenge 6 passed!")
Key Takeaways
- Use
@operator for matrix multiplication — it is clearer thannp.dotfor 2D arrays - Use
np.linalg.solveinstead of explicitnp.linalg.inv— it is faster and more numerically stable - Use
eigh(noteig) for symmetric matrices like covariance — it guarantees real eigenvalues and is faster np.argpartitionis O(n) for finding top-k indices vs O(n log n) fornp.argsort- SVD is the workhorse of dimensionality reduction: PCA, matrix factorization, and embedding compression all use it
- Always add regularization (ridge) when inverting matrices in ML to avoid numerical instability
Lilly Tech Systems