Intermediate

BFS & DFS Graph Problems

Breadth-first search and depth-first search are the two fundamental graph traversal strategies. BFS explores level by level (shortest path in unweighted graphs), while DFS explores as deep as possible (cycle detection, topological sort). In ML, BFS drives beam search in sequence models, while DFS powers backtracking in constraint satisfaction and hyperparameter search.

Problem 1: Number of Islands

🎯
Problem: Given a 2D grid of '1's (land) and '0's (water), count the number of islands. An island is surrounded by water and formed by connecting adjacent lands horizontally or vertically.
ML Context: This is connected component labeling — the same algorithm used in image segmentation to identify distinct objects. In computer vision, each "island" is a detected region (e.g., a tumor in a medical scan).
from collections import deque

def num_islands(grid: list) -> int:
    """Count islands using BFS flood fill.
    Time: O(m * n), Space: O(min(m, n)) for BFS queue.
    """
    if not grid:
        return 0

    rows, cols = len(grid), len(grid[0])
    count = 0

    def bfs(r, c):
        queue = deque([(r, c)])
        grid[r][c] = '0'  # Mark visited
        while queue:
            row, col = queue.popleft()
            for dr, dc in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
                nr, nc = row + dr, col + dc
                if 0 <= nr < rows and 0 <= nc < cols and grid[nr][nc] == '1':
                    grid[nr][nc] = '0'
                    queue.append((nr, nc))

    for r in range(rows):
        for c in range(cols):
            if grid[r][c] == '1':
                bfs(r, c)
                count += 1

    return count

# DFS version (often simpler for grid problems)
def num_islands_dfs(grid: list) -> int:
    """Count islands using DFS.
    Time: O(m * n), Space: O(m * n) worst case for call stack.
    """
    if not grid:
        return 0

    rows, cols = len(grid), len(grid[0])
    count = 0

    def dfs(r, c):
        if r < 0 or r >= rows or c < 0 or c >= cols or grid[r][c] == '0':
            return
        grid[r][c] = '0'  # Mark visited
        dfs(r + 1, c)
        dfs(r - 1, c)
        dfs(r, c + 1)
        dfs(r, c - 1)

    for r in range(rows):
        for c in range(cols):
            if grid[r][c] == '1':
                dfs(r, c)
                count += 1

    return count

# Test
grid = [
    ['1','1','0','0','0'],
    ['1','1','0','0','0'],
    ['0','0','1','0','0'],
    ['0','0','0','1','1']
]
print(num_islands(grid))  # 3

Problem 2: Clone Graph

🎯
Problem: Given a reference to a node in a connected undirected graph, return a deep copy of the graph. Each node contains a value and a list of neighbors.
ML Context: Deep copying computation graphs is essential when implementing gradient checkpointing, model parallelism, or when you need to fork a model's computation graph for different training branches (e.g., multi-task learning with shared backbones).
class GraphNode:
    def __init__(self, val=0, neighbors=None):
        self.val = val
        self.neighbors = neighbors if neighbors else []

def clone_graph(node: GraphNode) -> GraphNode:
    """Clone graph using BFS with a hash map for visited tracking.
    Time: O(V + E), Space: O(V).
    """
    if not node:
        return None

    cloned = {node: GraphNode(node.val)}
    queue = deque([node])

    while queue:
        current = queue.popleft()
        for neighbor in current.neighbors:
            if neighbor not in cloned:
                cloned[neighbor] = GraphNode(neighbor.val)
                queue.append(neighbor)
            cloned[current].neighbors.append(cloned[neighbor])

    return cloned[node]

def clone_graph_dfs(node: GraphNode) -> GraphNode:
    """Clone graph using DFS with hash map.
    Time: O(V + E), Space: O(V).
    """
    cloned = {}

    def dfs(n):
        if n in cloned:
            return cloned[n]
        copy = GraphNode(n.val)
        cloned[n] = copy
        for neighbor in n.neighbors:
            copy.neighbors.append(dfs(neighbor))
        return copy

    return dfs(node) if node else None

# Test
n1, n2, n3 = GraphNode(1), GraphNode(2), GraphNode(3)
n1.neighbors = [n2, n3]
n2.neighbors = [n1, n3]
n3.neighbors = [n1, n2]
cloned = clone_graph(n1)
print(cloned.val, [n.val for n in cloned.neighbors])  # 1 [2, 3]

Problem 3: Course Schedule (Cycle Detection)

🎯
Problem: There are numCourses courses labeled 0 to numCourses-1. Given a list of prerequisites [a, b] meaning "b must be taken before a", determine if it is possible to finish all courses (i.e., no circular dependencies).
ML Context: ML pipelines are DAGs. If pipeline A depends on B and B depends on A, you have a deadlock. This problem detects circular dependencies in Airflow DAGs, Kubeflow pipelines, and feature dependency chains.
def can_finish(num_courses: int, prerequisites: list) -> bool:
    """Detect cycle in directed graph using DFS coloring.
    WHITE (0) = unvisited, GRAY (1) = in current path, BLACK (2) = done.
    A cycle exists if we encounter a GRAY node during DFS.
    Time: O(V + E), Space: O(V + E).
    """
    from collections import defaultdict

    graph = defaultdict(list)
    for course, prereq in prerequisites:
        graph[prereq].append(course)

    WHITE, GRAY, BLACK = 0, 1, 2
    color = [WHITE] * num_courses

    def has_cycle(node):
        color[node] = GRAY
        for neighbor in graph[node]:
            if color[neighbor] == GRAY:
                return True   # Back edge = cycle
            if color[neighbor] == WHITE and has_cycle(neighbor):
                return True
        color[node] = BLACK
        return False

    for course in range(num_courses):
        if color[course] == WHITE:
            if has_cycle(course):
                return False

    return True

def can_finish_bfs(num_courses: int, prerequisites: list) -> bool:
    """Kahn's algorithm (BFS topological sort) for cycle detection.
    If we process fewer than numCourses nodes, a cycle exists.
    Time: O(V + E), Space: O(V + E).
    """
    from collections import defaultdict

    graph = defaultdict(list)
    in_degree = [0] * num_courses

    for course, prereq in prerequisites:
        graph[prereq].append(course)
        in_degree[course] += 1

    queue = deque([i for i in range(num_courses) if in_degree[i] == 0])
    processed = 0

    while queue:
        node = queue.popleft()
        processed += 1
        for neighbor in graph[node]:
            in_degree[neighbor] -= 1
            if in_degree[neighbor] == 0:
                queue.append(neighbor)

    return processed == num_courses

# Test
print(can_finish(4, [[1,0],[2,1],[3,2]]))    # True (0->1->2->3)
print(can_finish(2, [[0,1],[1,0]]))           # False (cycle: 0<->1)

Problem 4: Word Ladder

🎯
Problem: Given two words (beginWord and endWord) and a word list, find the length of the shortest transformation sequence where each step changes exactly one letter and each intermediate word must be in the word list.
ML Context: Word ladder is a graph shortest-path problem on an implicit graph. In NLP, similar graphs model word edit distances for spelling correction, and the same BFS approach finds the minimum number of edits to transform one word to another.
def word_ladder(begin_word: str, end_word: str, word_list: list) -> int:
    """BFS shortest path on implicit word graph.
    Each word connects to words differing by 1 character.
    Time: O(M^2 * N) where M = word length, N = word list size.
    Space: O(M^2 * N) for pattern mapping.
    """
    word_set = set(word_list)
    if end_word not in word_set:
        return 0

    # Build pattern map: h*t -> [hot, hat, hit, ...]
    from collections import defaultdict
    patterns = defaultdict(list)
    for word in word_set:
        for i in range(len(word)):
            pattern = word[:i] + '*' + word[i+1:]
            patterns[pattern].append(word)

    queue = deque([(begin_word, 1)])
    visited = {begin_word}

    while queue:
        word, length = queue.popleft()
        for i in range(len(word)):
            pattern = word[:i] + '*' + word[i+1:]
            for neighbor in patterns[pattern]:
                if neighbor == end_word:
                    return length + 1
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append((neighbor, length + 1))

    return 0

# Test
print(word_ladder("hit", "cog", ["hot","dot","dog","lot","log","cog"]))  # 5
# hit -> hot -> dot -> dog -> cog

Problem 5: Shortest Path in Unweighted Graph

🎯
Problem: Given an unweighted undirected graph with n nodes and a list of edges, find the shortest path from source to target. Return the path or -1 if no path exists.
ML Context: Finding shortest paths in knowledge graphs helps determine semantic relatedness between entities. In recommendation systems, the shortest path between a user and an item through the interaction graph can serve as a feature for ranking models.
def shortest_path(n: int, edges: list, source: int, target: int) -> list:
    """BFS shortest path with path reconstruction.
    Time: O(V + E), Space: O(V + E).
    """
    from collections import defaultdict

    graph = defaultdict(list)
    for u, v in edges:
        graph[u].append(v)
        graph[v].append(u)

    queue = deque([(source, [source])])
    visited = {source}

    while queue:
        node, path = queue.popleft()
        if node == target:
            return path
        for neighbor in graph[node]:
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append((neighbor, path + [neighbor]))

    return []  # No path found

# Optimized: track parents instead of carrying full paths
def shortest_path_optimized(n: int, edges: list, source: int, target: int) -> list:
    """BFS with parent tracking for memory-efficient path reconstruction.
    Time: O(V + E), Space: O(V).
    """
    from collections import defaultdict

    graph = defaultdict(list)
    for u, v in edges:
        graph[u].append(v)
        graph[v].append(u)

    parent = {source: None}
    queue = deque([source])

    while queue:
        node = queue.popleft()
        if node == target:
            # Reconstruct path
            path = []
            while node is not None:
                path.append(node)
                node = parent[node]
            return path[::-1]
        for neighbor in graph[node]:
            if neighbor not in parent:
                parent[neighbor] = node
                queue.append(neighbor)

    return []

# Test
edges = [(0,1), (0,2), (1,3), (2,3), (3,4)]
print(shortest_path(5, edges, 0, 4))  # [0, 1, 3, 4] or [0, 2, 3, 4]

Problem 6: Connected Components

🎯
Problem: Given n nodes and a list of undirected edges, find the number of connected components.
ML Context: Connected components are used in community detection for social network analysis, clustering users by interaction patterns, and identifying disconnected subgraphs in knowledge graphs. Label propagation algorithms for semi-supervised learning also operate on connected components.
def count_components(n: int, edges: list) -> int:
    """Count connected components using BFS.
    Time: O(V + E), Space: O(V + E).
    """
    from collections import defaultdict

    graph = defaultdict(list)
    for u, v in edges:
        graph[u].append(v)
        graph[v].append(u)

    visited = set()
    components = 0

    for node in range(n):
        if node not in visited:
            # BFS to mark all nodes in this component
            queue = deque([node])
            visited.add(node)
            while queue:
                current = queue.popleft()
                for neighbor in graph[current]:
                    if neighbor not in visited:
                        visited.add(neighbor)
                        queue.append(neighbor)
            components += 1

    return components

def count_components_union_find(n: int, edges: list) -> int:
    """Count connected components using Union-Find.
    Time: O(E * alpha(V)) ~ O(E), Space: O(V).
    """
    parent = list(range(n))
    rank = [0] * n

    def find(x):
        while parent[x] != x:
            parent[x] = parent[parent[x]]  # Path compression
            x = parent[x]
        return x

    def union(x, y):
        px, py = find(x), find(y)
        if px == py:
            return False
        if rank[px] < rank[py]:
            px, py = py, px
        parent[py] = px
        if rank[px] == rank[py]:
            rank[px] += 1
        return True

    components = n
    for u, v in edges:
        if union(u, v):
            components -= 1

    return components

# Test
print(count_components(5, [(0,1), (1,2), (3,4)]))           # 2
print(count_components_union_find(5, [(0,1), (1,2), (3,4)])) # 2

BFS vs DFS Decision Guide

ScenarioUse BFSUse DFS
Shortest path (unweighted)Yes (guaranteed)No
Cycle detectionYesYes (often simpler)
Topological sortKahn's algorithmReverse postorder
Connected componentsYesYes
Exhaustive searchMemory-intensiveBetter (backtracking)
Level-by-level processingNatural fitPossible but awkward
💡
Rule of thumb: Use BFS when you need shortest path or level-order processing. Use DFS when you need to explore all paths, detect cycles, or the problem involves backtracking. When either works, DFS is usually simpler to code recursively.