Intermediate
Regression
Predict continuous outcomes using linear regression, regularized models, decision trees, random forests, and gradient boosting.
Linear Regression
R
# Base R model <- lm(mpg ~ wt + hp + cyl, data = mtcars) summary(model) # With tidymodels (parsnip) library(tidymodels) lm_spec <- linear_reg() |> set_engine("lm") lm_fit <- lm_spec |> fit(mpg ~ wt + hp + cyl, data = mtcars) tidy(lm_fit) # Coefficients as tibble glance(lm_fit) # Model summary statistics
Ridge and Lasso Regression
R
# Ridge regression (penalty = L2, mixture = 0) ridge_spec <- linear_reg(penalty = 0.1, mixture = 0) |> set_engine("glmnet") # Lasso regression (penalty = L1, mixture = 1) lasso_spec <- linear_reg(penalty = 0.1, mixture = 1) |> set_engine("glmnet") # Elastic net (mixture between 0 and 1) enet_spec <- linear_reg(penalty = 0.1, mixture = 0.5) |> set_engine("glmnet") # Fit with workflow rec <- recipe(mpg ~ ., data = mtcars) |> step_normalize(all_numeric_predictors()) wf <- workflow() |> add_recipe(rec) |> add_model(lasso_spec) fit <- wf |> fit(data = mtcars)
Decision Tree Regression
R
tree_spec <- decision_tree(tree_depth = 5, min_n = 10) |> set_engine("rpart") |> set_mode("regression") tree_fit <- tree_spec |> fit(mpg ~ ., data = mtcars) # Visualize the tree library(rpart.plot) rpart.plot(tree_fit$fit)
Random Forest Regression
R
rf_spec <- rand_forest(trees = 500, mtry = 3, min_n = 5) |> set_engine("ranger", importance = "impurity") |> set_mode("regression") rf_fit <- rf_spec |> fit(mpg ~ ., data = mtcars) # Feature importance library(vip) vip(rf_fit)
XGBoost Regression
R
xgb_spec <- boost_tree( trees = 500, tree_depth = 6, learn_rate = 0.01, loss_reduction = 0.1 ) |> set_engine("xgboost") |> set_mode("regression") xgb_fit <- xgb_spec |> fit(mpg ~ ., data = mtcars)
Regression Metrics
| Metric | Function | Interpretation |
|---|---|---|
| RMSE | rmse() | Root Mean Squared Error (lower is better) |
| MAE | mae() | Mean Absolute Error (lower is better) |
| R-squared | rsq() | Proportion of variance explained (closer to 1 is better) |
R
# Evaluate predictions predictions <- rf_fit |> predict(test) results <- test |> bind_cols(predictions) rmse(results, truth = mpg, estimate = .pred) mae(results, truth = mpg, estimate = .pred) rsq(results, truth = mpg, estimate = .pred)
Lilly Tech Systems