Prognostic Models (Survival Analysis)
This track is dedicated to survival prediction tasks.
1. Initialization
First, initialize the prognostic modeling system.
initialize_modeling_system_pro()
#> Prognosis modeling system initialized and default models registered.
2. Training Single Models with models_pro
The models_pro
function trains one or more standard
survival models. For this demonstration, we’ll run a subset.
# Run a subset of available prognostic models
results_all_pro <- models_pro(train_pro, model = c("lasso_pro", "rsf_pro"))
#> Running model: lasso_pro
#> Running model: rsf_pro
# Print summary for Random Survival Forest
print_model_summary_pro("rsf_pro", results_all_pro$rsf_pro)
#>
#> --- rsf_pro Prognosis Model (on Training Data) Metrics ---
#> C-index: 0.8702
#> Time-dependent AUROC (years 1, 3, 5): 0.8041, 0.8170, 0.7559
#> Average Time-dependent AUROC: 0.7923
#> KM Group HR (High vs Low): 15.1349 (p-value: 1.509e-16, Cutoff: -1132.7747)
#> --------------------------------------------------
3. Ensemble Modeling
Bagging (bagging_pro
)
Builds a Bagging ensemble for survival models.
# Create a Bagging ensemble with lasso as the base survival model
# n_estimators is reduced for faster execution.
bagging_lasso_pro_results <- bagging_pro(train_pro, base_model_name = "lasso_pro", n_estimators = 5, seed = 123)
#> Running Bagging model: Bagging_pro (base: lasso_pro)
print_model_summary_pro("Bagging (LASSO)", bagging_lasso_pro_results)
#>
#> --- Bagging (LASSO) Prognosis Model (on Training Data) Metrics ---
#> Ensemble Type: Bagging (Base: lasso_pro, Estimators: 5)
#> C-index: 0.7330
#> Time-dependent AUROC (years 1, 3, 5): 0.5318, 0.6148, 0.6028
#> Average Time-dependent AUROC: 0.5831
#> KM Group HR (High vs Low): 3.2048 (p-value: 2.22e-08, Cutoff: 0.3661)
#> --------------------------------------------------
Stacking (stacking_pro
)
Builds a Stacking ensemble for survival models.
# Create a Stacking ensemble with lasso as the meta-model
stacking_lasso_pro_results <- stacking_pro(
results_all_models = results_all_pro,
data = train_pro,
meta_model_name = "lasso_pro"
)
#> Running Stacking model: Stacking_pro (meta: lasso_pro)
print_model_summary_pro("Stacking (LASSO)", stacking_lasso_pro_results)
#>
#> --- Stacking (LASSO) Prognosis Model (on Training Data) Metrics ---
#> Ensemble Type: Stacking (Meta: lasso_pro, Base models used: rsf_pro, lasso_pro)
#> C-index: 0.8814
#> Time-dependent AUROC (years 1, 3, 5): 0.7389, 0.8173, 0.8510
#> Average Time-dependent AUROC: 0.8024
#> KM Group HR (High vs Low): 19.5197 (p-value: 3.159e-18, Cutoff: 18.0175)
#> --------------------------------------------------
4. Applying Models to New Data (apply_pro
)
Generate prognostic scores for a new dataset.
# Apply the trained stacking model to the test set
pro_pred_new <- apply_pro(
trained_model_object = stacking_lasso_pro_results$model_object,
new_data = test_pro,
time_unit = "day"
)
#> Applying model on new data...
# Evaluate the new prognostic scores
eval_pro_new <- evaluate_predictions_pro(
prediction_df = pro_pred_new,
years_to_evaluate = c(1,3, 5)
)
print(eval_pro_new)
#> $C_index
#> [1] 0.5778903
#>
#> $AUROC_Years
#> $AUROC_Years$`1`
#> [1] 0.4968283
#>
#> $AUROC_Years$`3`
#> [1] 0.5527718
#>
#> $AUROC_Years$`5`
#> [1] 0.566955
#>
#>
#> $AUROC_Average
#> [1] 0.5388517
#>
#> $KM_HR
#> [1] 1.862771
#>
#> $KM_P_value
#> [1] 0.04657938
#>
#> $KM_Cutoff
#> [1] 17.41586
5. Visualization (figure_pro
)
Generate Kaplan-Meier (KM) and time-dependent ROC (tdROC) curves.
# Kaplan-Meier Curve
figure_pro(type = "km", data = stacking_lasso_pro_results, time_unit= "days")
# Time-Dependent ROC Curve
figure_pro(type = "tdroc", data = stacking_lasso_pro_results, time_unit = "days")