build_models <- function(data, all_splits, setup_name) {
num_folds = 3
seq = 1:num_folds
for (split_num in 1:5) {
split = all_splits[[split_num]]
split_preds = data.frame(matrix(nrow = 0, ncol = 4))
colnames(split_preds) = c("PD", "PDism", "PDGP", "response")
for (i in 1:num_folds) {
test = split[[i]]
train_indx = seq[seq != i]
train = data.frame(matrix(nrow = 0, ncol = length(data)))
colnames(train) = colnames(data)
for (indx in train_indx) {
train = rbind(train, split[[indx]])
}
train_PDGP = train$PDGP
test_PDGP = test$PDGP
train = subset(train, select = -PDGP)
test = subset(test, select = -PDGP)
predictor_vars = subset(train, select = -response_var)
response_var = train$response_var
count = t(as.data.frame(table(response_var))[, 2])
min_size = min(count)
num_classes = length(unique(response_var))
sampsize = rep(min_size, num_classes)
rf = randomForest(x = predictor_vars, y = response_var,
ntree = 1e+05, importance = TRUE, sampsize = sampsize,
proximity = TRUE)
save(rf, file = paste0("./models/PD_PDism_RF_split",
split_num, "_iter", i, "_tug.RData"))
preds = as.data.frame(predict(rf, test, type = "prob"))
preds$PDGP = test_PDGP
preds$response = factor(ifelse(preds[, 1] > preds[,
2], "PD", "PDism"))
split_preds = rbind(split_preds, preds)
}
write.csv(split_preds, file = paste0("models/split",
split_num, "_PD_PDism_RF_predictions_tug.csv"), row.names = FALSE)
}
}