library(stringr)
library(DescTools)
library(pROC)
library(MLmetrics)
library(caret)
library(pROC)
library(kableExtra)
balanced_accuracy <- function(y_pred, y_true) {
return(mean(c(sensitivity(y_pred, y_true), specificity(y_pred,
y_true)), na.rm = TRUE))
}
PD and parkinsonism participants
load("./rdata/var_reduct_PD_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
"PDism"))
sl_preds = read.csv("./models/split1_PD_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)
num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
sl_preds = read.csv(paste0("./models/split", i, "_PD_PDism_RF_predictions_3folds_tug.csv"))
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
sl_preds = subset(sl_preds, select = -PDGP)
colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
i)
all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
"response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
uniqx <- unique(na.omit(x))
uniqx[which.max(tabulate(match(x, uniqx)))]
})
conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism")))
knitr::kable(conf$table)
|
|
PD
|
PDism
|
|
PD
|
230
|
4
|
|
PDism
|
30
|
14
|
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))) * 100, 2), "%\n")
## Balanced accuracy = 83.12 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))), "\n")
## F1_score = 0.9311741
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 0.8312
Mild PD and parkinsonism participants
load("./rdata/var_reduct_HY_early_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
"PDism"))
sl_preds = read.csv("./models/split1_HY_early_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)
num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
sl_preds = read.csv(paste0("./models/split", i, "_HY_early_PDism_RF_predictions_3folds_tug.csv"))
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
sl_preds = subset(sl_preds, select = -PDGP)
colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
i)
all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
"response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
uniqx <- unique(na.omit(x))
uniqx[which.max(tabulate(match(x, uniqx)))]
})
conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism")))
knitr::kable(conf$table)
|
|
PD
|
PDism
|
|
PD
|
169
|
0
|
|
PDism
|
15
|
6
|
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))) * 100, 2), "%\n")
## Balanced accuracy = 95.92 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))), "\n")
## F1_score = 0.9575071
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 0.9592
Moderate PD and parkinsonism participants
load("./rdata/var_reduct_HY_mild_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
"PDism"))
sl_preds = read.csv("./models/split1_HY_mild_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)
num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
sl_preds = read.csv(paste0("./models/split", i, "_HY_mild_PDism_RF_predictions_3folds_tug.csv"))
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
sl_preds = subset(sl_preds, select = -PDGP)
colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
i)
all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
"response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
uniqx <- unique(na.omit(x))
uniqx[which.max(tabulate(match(x, uniqx)))]
})
conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism")))
knitr::kable(conf$table)
|
|
PD
|
PDism
|
|
PD
|
56
|
1
|
|
PDism
|
3
|
7
|
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))) * 100, 2), "%\n")
## Balanced accuracy = 91.21 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))), "\n")
## F1_score = 0.9655172
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 0.9121
Severe PD and parkinsonism participants
load("./rdata/var_reduct_HY_severe_PDism_splits_tug.RData")
data$response_var = factor(ifelse(data$response_var == 1, "PD",
"PDism"))
sl_preds = read.csv("./models/split1_HY_severe_PDism_RF_predictions_3folds_tug.csv")
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
PDGP = sl_preds$PDGP
sl_preds = subset(sl_preds, select = -PDGP)
num_splits = 5
all_sl_preds = sl_preds
colnames(all_sl_preds) = paste0(colnames(sl_preds), "_split1")
for (i in 2:num_splits) {
sl_preds = read.csv(paste0("./models/split", i, "_HY_severe_PDism_RF_predictions_3folds_tug.csv"))
sl_preds <- sl_preds[order(sl_preds$PDGP), ]
sl_preds = subset(sl_preds, select = -PDGP)
colnames(sl_preds) = paste0(colnames(sl_preds), "_split",
i)
all_sl_preds = cbind(all_sl_preds, sl_preds)
}
all_sl_pred_cls = all_sl_preds[, str_detect(colnames(all_sl_preds),
"response")]
all_sl_pred_cls$predict_mode = apply(all_sl_pred_cls, 1, function(x) {
uniqx <- unique(na.omit(x))
uniqx[which.max(tabulate(match(x, uniqx)))]
})
conf = caret::confusionMatrix(data = factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism")))
knitr::kable(conf$table)
|
|
PD
|
PDism
|
|
PD
|
17
|
0
|
|
PDism
|
0
|
4
|
cat("Balanced accuracy = ", round(balanced_accuracy(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))) * 100, 2), "%\n")
## Balanced accuracy = 100 %
cat("F1_score = ", F1_Score(factor(all_sl_pred_cls$predict_mode,
levels = c("PD", "PDism")), factor(data$response_var, levels = c("PD",
"PDism"))), "\n")
## F1_score = 1
auc = auc(as.numeric(data$response_var), as.numeric(factor(all_sl_pred_cls$predict_mode)))
print(auc)
## Area under the curve: 1