Intermediate

Matrix Mathematics

Six coding challenges covering the linear algebra operations that underpin all of machine learning: dot products for similarity, matrix multiplication for neural network layers, and decompositions (eigenvalue, SVD) for dimensionality reduction.

Challenge 1: Dot Product for Similarity

Problem: Given a query embedding q of shape (D,) and a matrix of document embeddings docs of shape (N, D), compute the dot product similarity between the query and every document. Return the indices of the top-K most similar documents.

💡

Why this is asked: Dot product similarity is the foundation of retrieval systems, attention mechanisms, and recommendation engines. This exact operation runs billions of times per day at Google and Meta.

import numpy as np

# ---- CHALLENGE ----
def dot_product_retrieval(q, docs, k):
    """Return indices of top-k documents by dot product similarity with query."""
    pass

# ---- SOLUTION ----
def dot_product_retrieval(q, docs, k):
    """Return indices of top-k documents by dot product similarity."""
    # (N, D) @ (D,) -> (N,) dot product with each doc
    similarities = docs @ q
    # np.argpartition is O(n) vs O(n log n) for full sort
    # It partitions so that the k largest are in the last k positions
    top_k_idx = np.argpartition(similarities, -k)[-k:]
    # Sort the top-k by similarity (descending)
    top_k_idx = top_k_idx[np.argsort(similarities[top_k_idx])[::-1]]
    return top_k_idx

# ---- VERIFICATION ----
np.random.seed(42)
q = np.random.randn(128)
docs = np.random.randn(10000, 128)
# Make doc 42 very similar to query
docs[42] = q * 2 + np.random.randn(128) * 0.01

top_5 = dot_product_retrieval(q, docs, 5)
assert 42 in top_5, "Doc 42 should be in top 5"
assert top_5[0] == 42, "Doc 42 should be the most similar"
print("Challenge 1 passed!")

Challenge 2: Neural Network Forward Pass

Problem: Implement a two-layer neural network forward pass using only NumPy matrix operations. Given input X of shape (N, D), weights W1 of shape (D, H), bias b1 of shape (H,), weights W2 of shape (H, C), and bias b2 of shape (C,), compute the output logits. Use ReLU activation between layers.

import numpy as np

# ---- CHALLENGE ----
def forward_pass(X, W1, b1, W2, b2):
    """Two-layer neural network: X -> Linear -> ReLU -> Linear -> logits."""
    pass

# ---- SOLUTION ----
def forward_pass(X, W1, b1, W2, b2):
    """Two-layer neural network forward pass."""
    # Layer 1: linear transformation
    # (N, D) @ (D, H) + (H,) -> (N, H) via broadcasting
    z1 = X @ W1 + b1

    # ReLU activation: max(0, x) element-wise
    a1 = np.maximum(0, z1)

    # Layer 2: linear transformation
    # (N, H) @ (H, C) + (C,) -> (N, C)
    logits = a1 @ W2 + b2

    return logits

# ---- VERIFICATION ----
N, D, H, C = 32, 784, 256, 10  # batch, input, hidden, classes
X = np.random.randn(N, D)
W1 = np.random.randn(D, H) * 0.01
b1 = np.zeros(H)
W2 = np.random.randn(H, C) * 0.01
b2 = np.zeros(C)

logits = forward_pass(X, W1, b1, W2, b2)
assert logits.shape == (N, C), f"Expected ({N}, {C}), got {logits.shape}"
print("Challenge 2 passed!")

Challenge 3: Linear Regression Closed-Form

Problem: Implement the closed-form solution for linear regression using the normal equation: w = (X^T X)^{-1} X^T y. Handle the case where X^T X is singular by adding regularization (ridge regression): w = (X^T X + lambda * I)^{-1} X^T y.

import numpy as np

# ---- CHALLENGE ----
def linear_regression_closed_form(X, y, reg_lambda=0.0):
    """Solve linear regression using normal equation with optional L2 regularization."""
    pass

# ---- SOLUTION ----
def linear_regression_closed_form(X, y, reg_lambda=0.0):
    """Normal equation: w = (X^T X + lambda*I)^{-1} X^T y"""
    D = X.shape[1]
    # X^T X: (D, N) @ (N, D) -> (D, D)
    XtX = X.T @ X
    # Add regularization to diagonal
    if reg_lambda > 0:
        XtX += reg_lambda * np.eye(D)
    # X^T y: (D, N) @ (N,) -> (D,)
    Xty = X.T @ y
    # Solve the system (more numerically stable than explicit inverse)
    w = np.linalg.solve(XtX, Xty)
    return w

# ---- VERIFICATION ----
np.random.seed(42)
N, D = 100, 5
X = np.random.randn(N, D)
w_true = np.array([1.0, -2.0, 3.0, -4.0, 5.0])
y = X @ w_true + np.random.randn(N) * 0.1  # small noise

w_hat = linear_regression_closed_form(X, y, reg_lambda=0.01)
assert np.allclose(w_hat, w_true, atol=0.1), f"Weights should be close to true: {w_hat}"

# Test with predictions
y_pred = X @ w_hat
mse = np.mean((y - y_pred) ** 2)
assert mse < 0.05, f"MSE too high: {mse}"
print("Challenge 3 passed!")

Challenge 4: Determinant and Matrix Properties

Problem: Given a covariance matrix, compute its determinant to check if the Gaussian distribution is well-defined. Implement a function that checks if a matrix is positive semi-definite (all eigenvalues >= 0), which is required for valid covariance matrices. Also compute the condition number to assess numerical stability.

import numpy as np

# ---- CHALLENGE ----
def analyze_covariance(cov_matrix):
    """Analyze a covariance matrix: determinant, PSD check, condition number."""
    pass

# ---- SOLUTION ----
def analyze_covariance(cov_matrix):
    """Analyze a covariance matrix for validity and numerical properties."""
    det = np.linalg.det(cov_matrix)
    eigenvalues = np.linalg.eigvalsh(cov_matrix)  # eigvalsh for symmetric
    is_psd = np.all(eigenvalues >= -1e-10)  # tolerance for numerical errors
    condition_number = np.linalg.cond(cov_matrix)
    is_well_conditioned = condition_number < 1e10

    return {
        "determinant": det,
        "eigenvalues": eigenvalues,
        "is_psd": is_psd,
        "condition_number": condition_number,
        "is_well_conditioned": is_well_conditioned,
        "log_det": np.log(det) if det > 0 else -np.inf
    }

# ---- VERIFICATION ----
# Valid covariance matrix (from data)
data = np.random.randn(100, 3)
cov = np.cov(data.T)  # (3, 3)
result = analyze_covariance(cov)
assert result["is_psd"], "Covariance from data should be PSD"
assert result["determinant"] > 0, "Valid covariance should have positive determinant"

# Singular matrix (rank-deficient)
singular = np.array([[1, 2], [2, 4]])  # row 2 = 2 * row 1
result2 = analyze_covariance(singular)
assert abs(result2["determinant"]) < 1e-10, "Singular matrix det should be ~0"
print("Challenge 4 passed!")

Challenge 5: PCA via Eigenvalue Decomposition

Problem: Implement Principal Component Analysis from scratch using eigenvalue decomposition. Given data X of shape (N, D), reduce it to k dimensions. Return the projected data, the principal components, and the explained variance ratio.

import numpy as np

# ---- CHALLENGE ----
def pca(X, k):
    """PCA: reduce X from (N, D) to (N, k). Return projected data,
    principal components, and explained variance ratio."""
    pass

# ---- SOLUTION ----
def pca(X, k):
    """PCA via eigenvalue decomposition of covariance matrix."""
    # Step 1: center the data
    mean = X.mean(axis=0)
    X_centered = X - mean

    # Step 2: compute covariance matrix
    # (D, N) @ (N, D) / (N-1) -> (D, D)
    cov = (X_centered.T @ X_centered) / (X.shape[0] - 1)

    # Step 3: eigenvalue decomposition (use eigh for symmetric matrices)
    eigenvalues, eigenvectors = np.linalg.eigh(cov)

    # Step 4: sort by eigenvalue descending
    # eigh returns in ascending order, so reverse
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    # Step 5: select top-k components
    components = eigenvectors[:, :k]  # (D, k)

    # Step 6: project data
    X_projected = X_centered @ components  # (N, D) @ (D, k) -> (N, k)

    # Step 7: explained variance ratio
    explained_var_ratio = eigenvalues[:k] / eigenvalues.sum()

    return X_projected, components, explained_var_ratio

# ---- VERIFICATION ----
np.random.seed(42)
# Create data with clear principal directions
N = 500
X = np.random.randn(N, 5)
# Make first two dimensions have much higher variance
X[:, 0] *= 10
X[:, 1] *= 5

X_proj, components, var_ratio = pca(X, k=2)
assert X_proj.shape == (N, 2), f"Expected ({N}, 2), got {X_proj.shape}"
assert components.shape == (5, 2), f"Expected (5, 2), got {components.shape}"
assert var_ratio[0] > var_ratio[1], "First component should explain more variance"
assert var_ratio.sum() > 0.8, f"Top 2 components should explain >80% variance, got {var_ratio.sum():.2f}"
print("Challenge 5 passed!")

Challenge 6: SVD for Low-Rank Approximation

Problem: Implement low-rank matrix approximation using SVD. Given a matrix A of shape (M, N) and rank k, compute the best rank-k approximation. This is used in recommendation systems (matrix factorization), image compression, and embedding compression.

import numpy as np

# ---- CHALLENGE ----
def low_rank_approx(A, k):
    """Compute best rank-k approximation of matrix A using SVD."""
    pass

def truncated_svd_embeddings(embeddings, k):
    """Compress embeddings from (N, D) to (N, k) using SVD."""
    pass

# ---- SOLUTION ----
def low_rank_approx(A, k):
    """Best rank-k approximation via truncated SVD."""
    # Full SVD: A = U @ diag(S) @ Vt
    U, S, Vt = np.linalg.svd(A, full_matrices=False)

    # Truncate to rank k
    U_k = U[:, :k]       # (M, k)
    S_k = S[:k]           # (k,)
    Vt_k = Vt[:k, :]     # (k, N)

    # Reconstruct: A_k = U_k @ diag(S_k) @ Vt_k
    A_k = U_k * S_k @ Vt_k  # broadcasting S_k across columns of U_k

    # Compression ratio
    original_params = A.shape[0] * A.shape[1]
    compressed_params = k * (A.shape[0] + A.shape[1] + 1)
    compression_ratio = original_params / compressed_params

    return A_k, compression_ratio

def truncated_svd_embeddings(embeddings, k):
    """Reduce embedding dimensionality using SVD."""
    # Center embeddings
    mean = embeddings.mean(axis=0)
    centered = embeddings - mean

    # SVD
    U, S, Vt = np.linalg.svd(centered, full_matrices=False)

    # Project to k dimensions: use U_k * S_k as the reduced embeddings
    reduced = U[:, :k] * S[:k]  # (N, k)

    # Reconstruction: reduced @ Vt_k + mean
    Vt_k = Vt[:k, :]  # (k, D) - needed for reconstruction

    return reduced, Vt_k, mean

# ---- VERIFICATION ----
np.random.seed(42)
# Create a rank-3 matrix with noise
A_true = np.random.randn(100, 3) @ np.random.randn(3, 50)
A_noisy = A_true + np.random.randn(100, 50) * 0.1

A_approx, ratio = low_rank_approx(A_noisy, k=3)
error = np.linalg.norm(A_true - A_approx) / np.linalg.norm(A_true)
assert error < 0.15, f"Reconstruction error too high: {error:.4f}"
assert ratio > 1, f"Should achieve compression, got ratio {ratio:.2f}"

# Test embedding compression
embeddings = np.random.randn(1000, 768)  # BERT-like embeddings
reduced, Vt_k, mean = truncated_svd_embeddings(embeddings, k=64)
assert reduced.shape == (1000, 64), f"Expected (1000, 64), got {reduced.shape}"
# Reconstruct and check
reconstructed = reduced @ Vt_k + mean
error = np.linalg.norm(embeddings - reconstructed) / np.linalg.norm(embeddings)
assert error < 1.0, f"Reconstruction error: {error:.4f}"
print("Challenge 6 passed!")

Key Takeaways

💡

Use @ operator for matrix multiplication — it is clearer than np.dot for 2D arrays
Use np.linalg.solve instead of explicit np.linalg.inv — it is faster and more numerically stable
Use eigh (not eig) for symmetric matrices like covariance — it guarantees real eigenvalues and is faster
np.argpartition is O(n) for finding top-k indices vs O(n log n) for np.argsort
SVD is the workhorse of dimensionality reduction: PCA, matrix factorization, and embedding compression all use it
Always add regularization (ridge) when inverting matrices in ML to avoid numerical instability

← Previous Array Operations Next → Statistical Operations