set.seed(NULL)
load("../rdata/sensor_features_all_tasks.RData")
parkinsonism_ids = c(38, 168, 194, 199, 207, 212, 214, 223, 224,
235, 241, 252, 259, 261, 263, 265, 268, 297, 298)
# Create five split stratified by group size
create_groups <- function(group1, group2) {
groups = list()
group1_size = floor(nrow(group1)/5)
group2_size = floor(nrow(group2)/5)
for (i in 1:4) {
gp1 = sample(1:nrow(group1), size = group1_size, replace = FALSE)
gp1 = group1[gp1, ]
group1 = group1[!rownames(group1) %in% rownames(gp1),
]
gp2 = sample(1:nrow(group2), size = group2_size, replace = FALSE)
gp2 = group2[gp2, ]
group2 = group2[!rownames(group2) %in% rownames(gp2),
]
group = rbind(gp1, gp2)
groups[[i]] = group
}
group = rbind(group1, group2)
groups[[5]] = group
return(groups)
}
PD participants and controls
load("./rdata/PD_control_seg.RData")
data = sensor_df
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
"PDGP")]
data$response = as.factor(labels)
PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
groups = create_groups(PD_data, HC_data)
all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_PD_control_splits.RData")
# Write to csv files
seq = 1:5
for (split_num in 1:5) {
split = all_splits[[split_num]]
for (i in 1:5) {
test = split[[i]]
train_indx = seq[seq != i]
train = data.frame(matrix(nrow = 0, ncol = length(data)))
colnames(train) = colnames(data)
for (indx in train_indx) {
train = rbind(train, split[[indx]])
}
write.csv(train, paste0("./files/PD_HC/train_test_files/train_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
write.csv(test, paste0("./files/PD_HC/train_test_files/test_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
}
}
Mild PD participants and controls
load("./rdata/HY_control_early.RData")
data = sensor_df_early
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
"PDGP")]
data$response = as.factor(labels)
PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
groups = create_groups(PD_data, HC_data)
all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_HY_early_HC_splits.RData")
# Write to csv files
seq = 1:5
for (split_num in 1:5) {
split = all_splits[[split_num]]
for (i in 1:5) {
test = split[[i]]
train_indx = seq[seq != i]
train = data.frame(matrix(nrow = 0, ncol = length(data)))
colnames(train) = colnames(data)
for (indx in train_indx) {
train = rbind(train, split[[indx]])
}
write.csv(train, paste0("./files/HY_early_HC/train_test_files/train_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
write.csv(test, paste0("./files/HY_early_HC/train_test_files/test_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
}
}
Moderate PD participants and controls
load("./rdata/HY_control_mild.RData")
data = sensor_df_mild
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
"PDGP")]
data$response = as.factor(labels)
PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
groups = create_groups(PD_data, HC_data)
all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_HY_mild_HC_splits.RData")
# Write to csv files
seq = 1:5
for (split_num in 1:5) {
split = all_splits[[split_num]]
for (i in 1:5) {
test = split[[i]]
train_indx = seq[seq != i]
train = data.frame(matrix(nrow = 0, ncol = length(data)))
colnames(train) = colnames(data)
for (indx in train_indx) {
train = rbind(train, split[[indx]])
}
write.csv(train, paste0("./files/HY_mild_HC/train_test_files/train_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
write.csv(test, paste0("./files/HY_mild_HC/train_test_files/test_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
}
}
Severe PD participants and controls
load("./rdata/HY_control_severe.RData")
data = sensor_df_severe
labels = rep(1, nrow(data))
labels[data$PDGP > 600] = 0
data = data[, colnames(data) %in% c(rownames(rf[["importance"]]),
"PDGP")]
data$response = as.factor(labels)
PD_data = data[data$response == 1, ]
HC_data = data[data$response == 0, ]
all_splits = list()
# Repeat with five different shuffles
for (i in 1:5) {
groups = create_groups(PD_data, HC_data)
all_splits[[i]] = groups
}
save(data, all_splits, file = "./rdata/var_reduct_HY_severe_HC_splits.RData")
# Write to csv files
seq = 1:5
for (split_num in 1:5) {
split = all_splits[[split_num]]
for (i in 1:5) {
test = split[[i]]
train_indx = seq[seq != i]
train = data.frame(matrix(nrow = 0, ncol = length(data)))
colnames(train) = colnames(data)
for (indx in train_indx) {
train = rbind(train, split[[indx]])
}
write.csv(train, paste0("./files/HY_severe_HC/train_test_files/train_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
write.csv(test, paste0("./files/HY_severe_HC/train_test_files/test_split",
split_num, "_iter", i, ".csv"), row.names = FALSE)
}
}