Section 4 Machine learning model

library(randomForest)
build_models <- function(data, all_splits, setup_name) {
    num_folds = 3
    seq = 1:num_folds

    for (split_num in 1:5) {
        split = all_splits[[split_num]]
        split_preds = data.frame(matrix(nrow = 0, ncol = 4))
        colnames(split_preds) = c("PD", "PDism", "PDGP", "response")

        for (i in 1:num_folds) {
            test = split[[i]]
            train_indx = seq[seq != i]
            train = data.frame(matrix(nrow = 0, ncol = length(data)))
            colnames(train) = colnames(data)
            for (indx in train_indx) {
                train = rbind(train, split[[indx]])
            }
            train_PDGP = train$PDGP
            test_PDGP = test$PDGP
            train = subset(train, select = -PDGP)
            test = subset(test, select = -PDGP)

            predictor_vars = subset(train, select = -response_var)
            response_var = train$response_var
            count = t(as.data.frame(table(response_var))[, 2])
            min_size = min(count)
            num_classes = length(unique(response_var))
            sampsize = rep(min_size, num_classes)

            rf = randomForest(x = predictor_vars, y = response_var,
                ntree = 1e+05, importance = TRUE, sampsize = sampsize,
                proximity = TRUE)
            save(rf, file = paste0("./models/PD_PDism_RF_split",
                split_num, "_iter", i, "_tug.RData"))

            preds = as.data.frame(predict(rf, test, type = "prob"))
            preds$PDGP = test_PDGP
            preds$response = factor(ifelse(preds[, 1] > preds[,
                2], "PD", "PDism"))
            split_preds = rbind(split_preds, preds)
        }
        write.csv(split_preds, file = paste0("models/split",
            split_num, "_PD_PDism_RF_predictions_tug.csv"), row.names = FALSE)
    }
}

4.1 PD and parkinsonism participants

load("./rdata/var_reduct_PD_PDism_splits_tug.RData")
build_models(data, all_splits, "PD_PDism")

4.2 Mild PD and parkinsonism participants

load("./rdata/var_reduct_HY_early_PDism_splits_tug.RData")
build_models(data, all_splits, "HY_early_PDism")

4.3 Moderate PD and parkinsonism participants

load("./rdata/var_reduct_HY_mild_PDism_splits_tug.RData")
build_models(data, all_splits, "HY_mild_PDism")

4.4 Severe PD and parkinsonism participants

load("./rdata/var_reduct_HY_severe_PDism_splits_tug.RData")
build_models(data, all_splits, "HY_severe_PDism")