Section 5 Performance evaluation

library(stringr)
library(DescTools)
library(pROC)
library(MLmetrics)
library(caret)
library(pROC)
library(kableExtra)

balanced_accuracy <- function(y_pred, y_true) {
    return(mean(c(sensitivity(y_pred, y_true), specificity(y_pred,
        y_true)), na.rm = TRUE))
}

5.1 PD and parkinsonism participants

load("./rdata/var_reduct_PD_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
    "PDism"))

sl_preds = read.csv("./models/split1_PD_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)

num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
    sl_preds = read.csv(paste0("./models/split", i, "_PD_PDism_RF_predictions_3folds_tug.csv"))
    sl_preds <- sl_preds[order(sl_preds$PDGP), ]
    sl_preds = subset(sl_preds, select = -PDGP)
    colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
        i)
    all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
    "response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
    uniqx <- unique(na.omit(x))
    uniqx[which.max(tabulate(match(x, uniqx)))]
})

conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism")))
knitr::kable(conf$table)
PD PDism
PD 230 4
PDism 30 14
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))) * 100, 2), "%\n")
## Balanced accuracy =  83.12 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))), "\n")
## F1_score =  0.9311741
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 0.8312

5.2 Mild PD and parkinsonism participants

load("./rdata/var_reduct_HY_early_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
    "PDism"))

sl_preds = read.csv("./models/split1_HY_early_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)

num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
    sl_preds = read.csv(paste0("./models/split", i, "_HY_early_PDism_RF_predictions_3folds_tug.csv"))
    sl_preds <- sl_preds[order(sl_preds$PDGP), ]
    sl_preds = subset(sl_preds, select = -PDGP)
    colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
        i)
    all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
    "response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
    uniqx <- unique(na.omit(x))
    uniqx[which.max(tabulate(match(x, uniqx)))]
})

conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism")))
knitr::kable(conf$table)
PD PDism
PD 169 0
PDism 15 6
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))) * 100, 2), "%\n")
## Balanced accuracy =  95.92 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))), "\n")
## F1_score =  0.9575071
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 0.9592

5.3 Moderate PD and parkinsonism participants

load("./rdata/var_reduct_HY_mild_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
    "PDism"))

sl_preds = read.csv("./models/split1_HY_mild_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)

num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
    sl_preds = read.csv(paste0("./models/split", i, "_HY_mild_PDism_RF_predictions_3folds_tug.csv"))
    sl_preds <- sl_preds[order(sl_preds$PDGP), ]
    sl_preds = subset(sl_preds, select = -PDGP)
    colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
        i)
    all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
    "response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
    uniqx <- unique(na.omit(x))
    uniqx[which.max(tabulate(match(x, uniqx)))]
})

conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism")))
knitr::kable(conf$table)
PD PDism
PD 56 1
PDism 3 7
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))) * 100, 2), "%\n")
## Balanced accuracy =  91.21 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))), "\n")
## F1_score =  0.9655172
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 0.9121

5.4 Severe PD and parkinsonism participants

load("./rdata/var_reduct_HY_severe_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
    "PDism"))

sl_preds = read.csv("./models/split1_HY_severe_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)

num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
    sl_preds = read.csv(paste0("./models/split", i, "_HY_severe_PDism_RF_predictions_3folds_tug.csv"))
    sl_preds <- sl_preds[order(sl_preds$PDGP), ]
    sl_preds = subset(sl_preds, select = -PDGP)
    colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
        i)
    all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
    "response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
    uniqx <- unique(na.omit(x))
    uniqx[which.max(tabulate(match(x, uniqx)))]
})

conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism")))
knitr::kable(conf$table)
PD PDism
PD 17 0
PDism 0 4
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))) * 100, 2), "%\n")
## Balanced accuracy =  100 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
    levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
    "PDism"))), "\n")
## F1_score =  1
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 1