Intermediate
Online Feature Store
Materialize features to Redis for sub-millisecond lookups during model inference.
Materialization
Materialization copies the latest feature values from the offline store into Redis so they can be served with low latency:
# Materialize features to Redis
feast materialize-incremental $(date +%Y-%m-%dT%H:%M:%S)
# Or from Python:
from feast import FeatureStore
from datetime import datetime, timedelta
store = FeatureStore(repo_path="feature_repo")
store.materialize_incremental(end_date=datetime.now())
Online Feature Retrieval
# src/online.py
from feast import FeatureStore
import time
store = FeatureStore(repo_path="feature_repo")
def get_online_features(driver_ids):
"""Get latest features from Redis for real-time inference."""
entity_rows = [{"driver_id": did} for did in driver_ids]
start = time.time()
features = store.get_online_features(
features=[
"driver_stats:conv_rate",
"driver_stats:acc_rate",
"driver_stats:avg_daily_trips",
],
entity_rows=entity_rows,
).to_dict()
latency = (time.time() - start) * 1000
print(f"Retrieved features for {len(driver_ids)} entities "
f"in {latency:.1f}ms")
return features
def benchmark(n_requests=100):
"""Benchmark online feature retrieval latency."""
latencies = []
for _ in range(n_requests):
start = time.time()
get_online_features([1, 2, 3])
latencies.append((time.time() - start) * 1000)
import numpy as np
print(f"p50: {np.percentile(latencies, 50):.1f}ms")
print(f"p95: {np.percentile(latencies, 95):.1f}ms")
print(f"p99: {np.percentile(latencies, 99):.1f}ms")
if __name__ == "__main__":
features = get_online_features([1, 2, 3, 4, 5])
print(features)
print("\nBenchmark:")
benchmark()
Redis performance: Typical latency is 1-5ms for single entity lookups and 5-15ms for batch lookups of 10+ entities. This is fast enough for real-time ML inference in production.