Section 3 Train/ Test split

set.seed(NULL)
load("../rdata/sensor_features_all_tasks.RData")
parkinsonism_ids = c(38, 168, 194, 199, 207, 212, 214, 223, 224,
    235, 241, 252, 259, 261, 263, 265, 268, 297, 298)

# Create five split stratified by group size
create_groups <- function(group1, group2) {
    groups = list()
    group1_size = floor(nrow(group1)/5)
    group2_size = floor(nrow(group2)/5)

    for (i in 1:4) {
        gp1 = sample(1:nrow(group1), size = group1_size, replace = FALSE)
        gp1 = group1[gp1, ]
        group1 = group1[!rownames(group1) %in% rownames(gp1),
            ]

        gp2 = sample(1:nrow(group2), size = group2_size, replace = FALSE)
        gp2 = group2[gp2, ]
        group2 = group2[!rownames(group2) %in% rownames(gp2),
            ]

        group = rbind(gp1, gp2)
        groups[[i]] = group
    }
    group = rbind(group1, group2)
    groups[[5]] = group
    return(groups)
}

3.1 PD participants and controls

load("./rdata/PD_control_seg.RData")
data = sensor_df
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
    "PDGP")]
data$response = as.factor(labels)

PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
    groups = create_groups(PD_data, HC_data)
    all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_PD_control_splits.RData")

# Write to csv files
seq = 1:5
for (split_num in 1:5) {
    split = all_splits[[split_num]]
    for (i in 1:5) {
        test = split[[i]]
        train_indx = seq[seq != i]
        train = data.frame(matrix(nrow = 0, ncol = length(data)))
        colnames(train) = colnames(data)
        for (indx in train_indx) {
            train = rbind(train, split[[indx]])
        }
        write.csv(train, paste0("./files/PD_HC/train_test_files/train_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
        write.csv(test, paste0("./files/PD_HC/train_test_files/test_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
    }
}

3.2 Mild PD participants and controls

load("./rdata/HY_control_early.RData")
data = sensor_df_early
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
    "PDGP")]
data$response = as.factor(labels)

PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
    groups = create_groups(PD_data, HC_data)
    all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_HY_early_HC_splits.RData")

# Write to csv files
seq = 1:5
for (split_num in 1:5) {
    split = all_splits[[split_num]]
    for (i in 1:5) {
        test = split[[i]]
        train_indx = seq[seq != i]
        train = data.frame(matrix(nrow = 0, ncol = length(data)))
        colnames(train) = colnames(data)
        for (indx in train_indx) {
            train = rbind(train, split[[indx]])
        }
        write.csv(train, paste0("./files/HY_early_HC/train_test_files/train_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
        write.csv(test, paste0("./files/HY_early_HC/train_test_files/test_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
    }
}

3.3 Moderate PD participants and controls

load("./rdata/HY_control_mild.RData")
data = sensor_df_mild
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
    "PDGP")]
data$response = as.factor(labels)

PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
    groups = create_groups(PD_data, HC_data)
    all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_HY_mild_HC_splits.RData")

# Write to csv files
seq = 1:5
for (split_num in 1:5) {
    split = all_splits[[split_num]]
    for (i in 1:5) {
        test = split[[i]]
        train_indx = seq[seq != i]
        train = data.frame(matrix(nrow = 0, ncol = length(data)))
        colnames(train) = colnames(data)
        for (indx in train_indx) {
            train = rbind(train, split[[indx]])
        }
        write.csv(train, paste0("./files/HY_mild_HC/train_test_files/train_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
        write.csv(test, paste0("./files/HY_mild_HC/train_test_files/test_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
    }
}

3.4 Severe PD participants and controls

load("./rdata/HY_control_severe.RData")
data = sensor_df_severe
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
    "PDGP")]
data$response = as.factor(labels)

PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
    groups = create_groups(PD_data, HC_data)
    all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_HY_severe_HC_splits.RData")

# Write to csv files
seq = 1:5
for (split_num in 1:5) {
    split = all_splits[[split_num]]
    for (i in 1:5) {
        test = split[[i]]
        train_indx = seq[seq != i]
        train = data.frame(matrix(nrow = 0, ncol = length(data)))
        colnames(train) = colnames(data)
        for (indx in train_indx) {
            train = rbind(train, split[[indx]])
        }
        write.csv(train, paste0("./files/HY_severe_HC/train_test_files/train_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
        write.csv(test, paste0("./files/HY_severe_HC/train_test_files/test_split",
            split_num, "_iter", i, ".csv"), row.names = FALSE)
    }
}