# EMIT Main Quarantine Paper Data Analysis # Program Objective: Using the authoritative, publishable dataset for the EMIT Quarantine study, # To produce the three study tables and the figure # Author: Jacob Bueno de Mesquita # Date: November 3-9, 2018; December, 2018; February, 2019 # Summary: This script produces the analysis for some of the results presented within the text that couldn't be easily produced within the EMIT_Quarantine_Main_Analysis.R script #### Load required packages, set working directory, and read in data file #### library(tidyverse) library(RcppRoll) library(readxl) library(knitr) library(data.table) library(lubridate) library(devtools) library(xtable) library(DT) library(kableExtra) # setwd("/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine") # Eliminating the setwd() in order to accomodate report compilation later. Qdata <- read.csv("/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Curated Data/Analytical Datasets/QuarantineMergedData.csv") session_info() #### * Producing Supplementary Information regarding donor status -- applying stringent criteria for symptomatic and ILI #### ## Applying more stringent criteria to main text Table 1 Symptomatic and ILI definitions ## These analysis have been written in the EMIT_Quarantine_Main_Analysis.R script, however were not used to produce Table 1 # However, c_sub2's were written that used the more stringent criteria (described in the code commentary) # The plan here is to copy over the code that was used to produe the main text Table 1 and use that #### Table 1 (donors): a) Number of infected, number of inoculated (and % infected of inoculated) #### # number of inoculated donors Qdata_inoculated_donors <- Qdata %>% filter(Randomization_DorIRorCR == "D") Qdata_inoculated_donors_table1 <- Qdata_inoculated_donors %>% group_by(QuarantineNumber) %>% summarize(Number_Inoculated_Donors = n_distinct(SubjectID)) print(Qdata_inoculated_donors_table1) # number of infected donors # positive by PCR (seroconversion, or PCR positive on more than 1 day) # let's get the list with at least one day PCR positive, then merge up with seroconversion data Qdata_pcr_pos1_or_more_days <- Qdata %>% filter(Randomization_DorIRorCR == "D") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) %>% summarize(NumberDaysPosPCR = n_distinct(StudyDay)) print(Qdata_pcr_pos1_or_more_days) # let's get the list with seroconversion by Microneuts (CDC serology) # First only select the subjectIDs that were serosusceptible by MN at baseline (<80 at baseline) # Revision, even if they were less serosusceptible, there was still a chance for seroconversion so do not filter out the <80 at baseline #Qdata_Microneut_susceptible <- Qdata %>% #filter(Randomization_DorIRorCR == "D" & Microneut_VisitType == "Q baseline" & Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80) %>% #distinct(SubjectID, .keep_all = FALSE) Qdata_Microneut_pos <- Qdata %>% filter(Randomization_DorIRorCR == "D" & Microneut_VisitType == "F/up" & Microneut_Seroconvert == 1) %>% distinct(SubjectID, .keep_all = TRUE) %>% select(SubjectID, QuarantineNumber) print(Qdata_Microneut_pos) # let's get the list with seroconversion by HAI (Glasgow serology) # First only select the subjectIDs that were serosusceptible by HAI at baseline (<=10 at baseline) # Revision, even if they were less serosusceptible, there was still a chance for seroconversion so do not filter out the <=10 at baseline #Qdata_HAI_susceptible <- Qdata %>% #filter(Randomization_DorIRorCR == "D" & HAI_dayminus2_recodeNDA <= 10) %>% #distinct(SubjectID, .keep_all = FALSE) Qdata_HAI_pos <- Qdata %>% filter(Randomization_DorIRorCR == "D" & HAI_Seroconversion == 1) %>% distinct(SubjectID, .keep_all = TRUE) %>% select(SubjectID, QuarantineNumber) print(Qdata_HAI_pos) # Now let's merge the datasets together to get full list of volunteers who meet positivity criteria Qdata_infected <- Qdata_HAI_pos %>% full_join(Qdata_Microneut_pos, by = c("SubjectID" = "SubjectID")) %>% full_join(Qdata_pcr_pos1_or_more_days, by = c("SubjectID" = "SubjectID")) %>% arrange(SubjectID) print(Qdata_infected) # Identify whom among the pcr positive individuals with only a single day of PCR positivity did not also seroconvert to confirm infection Qdata_1pcrpos_nosero <- Qdata_infected %>% filter(NumberDaysPosPCR ==1) %>% filter(is.na(QuarantineNumber.x) & is.na(QuarantineNumber.y)) # Among the individuals that were PCR positive on only a single day, which study day was the positive day? Qdata_pcr_pos1_or_more_days_studydays <- Qdata %>% filter(Randomization_DorIRorCR == "D") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) Qdata_pcr_pos1_day <- Qdata_infected %>% filter(NumberDaysPosPCR ==1) %>% left_join(Qdata_pcr_pos1_or_more_days_studydays, by = c("SubjectID" = "SubjectID")) %>% select(-`count`) %>% rename(HAI_seroconversion = QuarantineNumber.x, MN_seroconversion = QuarantineNumber.y) %>% mutate(HAI_seroconversion = ifelse(HAI_seroconversion>=1, 1, HAI_seroconversion)) %>% mutate(MN_seroconversion = ifelse(MN_seroconversion>=1, 1, MN_seroconversion)) print(Qdata_pcr_pos1_day) # Note on this outcome: all subjects that were only PCR on one day, were positive on day 2 or day 4 (not day 1) # This may be useful if the group is interested in changing the criteria for PCR positivity such that a single PCR test would count as criteria for infection as long as it wasn't on day 0 or day 1. # Conversation with Alex Mann leads me to believe we should keep the 2 separate day PCR positivity criteria for now. # Remove those who were only 1 day pcr positive and no seroconversion (protocol criteria for positivity) Qdata_infected <- Qdata_infected %>% anti_join(Qdata_1pcrpos_nosero, by = c("SubjectID" = "SubjectID")) # Summarize number of infected (by any criteria) for each Q # First need to attach a full set of quarantine numbers on the "Qdata_infected" df Qdata_QuarantineNumbers <- Qdata %>% select(SubjectID, QuarantineNumber) %>% distinct(SubjectID, .keep_all = TRUE) %>% filter(!is.na(SubjectID)) Qdata_infected_donors <- Qdata_infected %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) Qdata_infected_donors_table1 <- Qdata_infected_donors %>% group_by(QuarantineNumber) %>% summarize(Number_Infected_Donors = n_distinct(SubjectID)) print(Qdata_infected_donors_table1) #### Generation of Table 1 for paper #### # To output a nice summary table with numInfected, numInoculated, and %infected of inoculated Qdata_table1 <- Qdata_infected_donors_table1 %>% #using the df just created, above left_join(Qdata_inoculated_donors_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Infected_of_Inoculated = Number_Infected_Donors/Number_Inoculated_Donors) print(Qdata_table1) #### Table 1 (donors): b) Number of symptomatic (and % of infected) #### # How to define "symptomatic"? (Includes both "symptomatic non-ILI" and "symptomatic ILI") # Ben Killingley: "Symptomatic are the ones with symptoms (had to be more than 1 symptom on 1 day) but who did not reach ILI def" # Really, this means having evidence of 2 or more symptoms that occurred together over at least 2 consecutive days, or fever at least once # Figured out this classification criteria using information from Alex Mann and from reverse engineering the criteria from the CDC final report, which classified each volunteer (compared CDC final report with symptom profile of each volunteer) ## Note: The symptomatic, symptomatic non-ILI, ILI, and febrile are meant to be displayed as a fraction of the infected. However there are symptomatic, symtpomatic non-ILI, ILI, and perhaps even febrile who were not infected. Perhaps we should report these with symtpoms but who were not infected in their own supplementatry table? # Based on email correspondence with Don Milton and the EMIT team on October 3, 2018 we will forgo the "symptomatic" classification in the table # Thus, the next two sections of code (implementing versions 1 and 2) will not be further persued and published at this time. # Revision: Later email correspondence from October 4, 2018 resolves that we will use a milder criteria for symptomatic (and afebile). # Thus, Version 2 of Symptomatic will be used and is coded in this script somewhere below. #### ## Implementing Version 3 of "Symptomatic" #### ## The purpose of this version of symptomatic is so that we are consistent with the definitions from the proof-of-concept study (Killingley, 2012 JID) # Thus, this version 3 of symptomatic is: # "Any respiratory symptom that occurs at all over 2 consecutive days, or occurs for 3/3 (am, early pm, late pm) symptom measurements on a single day, where respiratory symptoms include: # runny nose, stuffy nose, sneezing, sore throat, cough, and shortness of breath" # First we are going to cut the new df that has only the 6 respiratory symptoms of interest # (and also to include fever, just in case of future analyses) Symptomatic_donors_infected_V3_days1to6 <- Qdata_infected_donors %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% filter(Microneut_VisitType == "Q baseline") %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # This is great but the way the data is put together, this leaves out the day -3 through day0 data # Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0 # Then we will bind it back to the "Symptomatic_donors_infected_V3" that was just created. Symptomatic_donors_infected_before_day1 <- Qdata_infected_donors %>% left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) # Now binding together and sorting Symptomatic_donors_infected_V3_dayneg3to6 <- bind_rows(Symptomatic_donors_infected_V3_days1to6, Symptomatic_donors_infected_before_day1) Symptomatic_donors_infected_V3_dayneg3to6 <- Symptomatic_donors_infected_V3_dayneg3to6 %>% arrange(SubjectID, StudyDay) # Filter those with three measurements positive in a single study day for any of the respiratory pathogens sub <- unique(Symptomatic_donors_infected_V3_days1to6$SubjectID) c_sub <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- Symptomatic_donors_infected_V3_days1to6[Symptomatic_donors_infected_V3_days1to6$SubjectID == subid, ] temp1<-temp[,6:12] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { token <- 1 c_sub <- rbind(c_sub, subid) break } if (token == 1) { break } } if (token == 1) { break } } if (token == 1) { break } } } # But what if we want to disqualify a symptom from the classification scheme if it already appeared before study day 1 # We will compare results using this more stringent criteria, with the results using the more stringent criteria and select one (or both) for publication sub <- unique(Symptomatic_donors_infected_V3_dayneg3to6$SubjectID) c_sub2 <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- Symptomatic_donors_infected_V3_dayneg3to6[Symptomatic_donors_infected_V3_dayneg3to6$SubjectID == subid, ] temp1<-temp[,6:12] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { sum1<-0 for (m in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1<-sum1+temp[m,l] } if (sum1==0) { token <- 1 c_sub2 <- rbind(c_sub2, subid) break } } } if (token == 1) { break } } if (token == 1) { break } } } # Note that this loop with the stringent criteria of not using symptoms that appear before day 1 in the classification of sympomatic_V3 yields c_sub2 with n=19 subjectIDs (5 fewer than in the less stringent loop above) # For now we will use the version with more stringent criteria but we will have the other version on deck in case we decide to change or publish both # Get get the list of subject IDs from c_sub2 into a df Symptomatic_V3_donors_infected_singleday <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") # Now use a loop to classify those with any sort of respiratory symptom on two consecutive days # For this we should use the "Symptomatic_donors_infected_grade123" df that marks with indicator of 1 when any of the 3 symptom measurements in a day showed evidence of symptoms of any grade. # First, manipulate the data to prepare for a loop that can classify symptomatic by using self report and DPE symptoms # Combine symptom severity measures (grades 1, 2, and 3) because grade >1 doesn't matter for this definition of symptomatic afebrile Symptomatic_donors_infected_grade123 <- Qdata_infected_donors %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% summarize(Febrile = max(Febrile), runnyNose123 = max(runnyNose123), stuffyNose123 = max(stuffyNose123), sneezing123 = max(sneezing123), soreThroat123 = max(soreThroat123), DPENasalDischarge123 = max(DPENasalDischarge123), DPEOtits123 = max(DPEOtits123), DPESinusTenderness123 = max(DPESinusTenderness123), DPEPharyngitis123 = max(DPEPharyngitis123), cough123 = max(cough123), SOB123 = max(SOB123), headache123 = max(headache123), muscleAches123 = max(muscleAches123), malaise123 = max(malaise123)) %>% select(SubjectID, QuarantineNumber, StudyDay, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) %>% ungroup() # The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was at least one symptoms (of any grade) detection per study day # Now to select which of the subjects were symptomatic (version1) (excluding the febrile criteria for now) # Note: Using the breaks in the loops for efficiency. If a subject is detected as symptomatic, the loop restarts on the next subjectID # But for this we only want to include symptoms for study days 1 to 6 so we need to cut a new df Symptomatic_donors_infected_grade123_day1to6 <- Symptomatic_donors_infected_grade123 %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) sub <- unique(Symptomatic_donors_infected_grade123_day1to6$SubjectID) c_sub <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_donors_infected_grade123_day1to6[Symptomatic_donors_infected_grade123_day1to6$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { c_sub <- rbind(c_sub, subid) token<-1 break } } } if (token==1){ break } } } # This yields a c_sub of 32 subjectIDs but let's now apply the more stringent version (eliminating Sx if appear before day 1) sub <- unique(Symptomatic_donors_infected_grade123$SubjectID) c_sub2 <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_donors_infected_grade123[Symptomatic_donors_infected_grade123$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { sum1<-0 for (l in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1 <- sum1+temp[l,k] } if (sum1 == 0) { c_sub2 <- rbind(c_sub2, subid) token<-1 break } } } } if (token==1){ break } } } # This yields a c_sub2 of 30 subjectIDs, so it cut out 2 # For now, however we will stick to using the more stringent criteria with regards to Sx before day1 # Rename "V1" as SubjectID Symptomatic_V3_donors_infected_twodays <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") # Combine the Symptomatic_V3_donors_infected_singleday df and the Symptomatic_V3_donors_infected_twodays df Symptomatic_V3_donors_infected_combined <- Symptomatic_V3_donors_infected_twodays %>% full_join(Symptomatic_V3_donors_infected_singleday, by = c("SubjectID" = "SubjectID")) %>% arrange(SubjectID) # But the above definition of symptomatic (V3) doesn't make any mention of febrile illness # Let's check to see if the febrile are already accounted for among the group of symptomatic version 3 Symptomatic_by_fever <- Symptomatic_donors_infected_grade123 %>% filter(Febrile == 1) %>% select(SubjectID) %>% anti_join(Symptomatic_V3_donors_infected_combined, by = c("SubjectID" = "SubjectID")) # Important to note that 1 subjectID was febrile but not symptomatic (#148) # For now we will add to the table without this one febrile case, but we will inquire about whether or not to include febrile (without respiratory Sx) along with the other respiratory symptoms as part of the symptomatic V3 set Symptomatic_V3_donors_infected_combined_table1 <- Symptomatic_V3_donors_infected_combined %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>% group_by(QuarantineNumber) %>% summarize(Number_Symptomatic_V3 = n_distinct(SubjectID)) # Add onto Table1 the number of symptomatic by version 3 criteria and % of infected Qdata_table1 <- Qdata_table1 %>% left_join(Symptomatic_V3_donors_infected_combined_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Symptomatic_V3_of_Infected = Number_Symptomatic_V3/Number_Infected_Donors) print(Qdata_table1) #### Table 1 (donors): c) Number of symptomatic, non-ILI (and % of infected) #### # We will ignore this category based on discussion above about removing "symptomatic" classification from the final report # Find note under "Table 1 (donors): b)..." section of code #### Table 1 (donors): d) Number of ILI (and % of infected) #### ## Definition of ILI by CDC: "Case definitions for influenza-like illness are nonspecific for influenza and vary depending on the purpose for which they are used A case definition of fever 100°F or greater, oral or equivalent, and cough and/or sore throat is used by CDC in its U.S." # We will create 2 versions of this definition: one including fever (as written above), and one not including fever # We will eliminate a symptom from contributing to classification criteria if it appeared during D-1 or D-2 # Later a third version was added based on teleconference on October 12, 2018 and the desire to be consistent with terminology following the previously published Killingley et al., 2012 paper #### ## Third version of classification of ILI (to match Killingley et al., 2012) (and % of infected) #### # This version came about during the October 12, 2018 conference call with the EMIT team (UK and UMD groups present) # For now this version sounds like it will be the one that we use for the paper. # This definition of ILI is: "an illness lasting >=24 hours with either (1) fever >37.9°C plus at least 1 respiratory symptom or (2) >=2 symptoms, at least 1 of which must be respiratory." # Where "respiratory symptom" means evidence of any grade of runny nose, stuffy nose, sneeze, sore throat, cough, shortness of breath # Where "lasting >=24 hours" means evidence of the symptom over all three instances of symptom measurements for a single day, or evidence of the symptom over two days at any frequency (1-3/3 instances of symptom recordings) # First, let's program the first criteria (fever >37.9C plus at least 1 respiratory symptom) # To do this, we can a) create the set of subject IDs that meet the fever criteria, and then check them for b) evidence of three instances during a single day, or c) evidence of any frequency of instances >=1 for 2 consecutive days # Then, we can deal with the second criteria for ILI (>=2 symptoms one of which being a respiratory) # Find the SubjectIDs from among the infected, that had fever # First check to see if anyone had fever before day 1. Qdata_infected_febrile_pre_day1 <- Qdata_infected_donors %>% left_join(Qdata) %>% filter(Randomization_DorIRorCR == "D") %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% filter(Tympanic.temp..degrees.C. >37.9) %>% distinct(SubjectID, .keep_all = FALSE) # As it turns out, none of the infected donors had fever before day 1 # Now we can see who among the infected subject IDs had fever at least once over study days 1-6 Qdata_infected_febrile_day1to6 <- Qdata_infected_donors %>% left_join(Qdata) %>% filter(Randomization_DorIRorCR == "D") %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>% filter(Tympanic.temp..degrees.C. >37.9) %>% distinct(SubjectID, .keep_all = FALSE) # Now see among the "Qdata_infected_febrile_day1to6" df, who had symptoms all day during a single study day # First we are going to cut the a new df that has the 9 symptoms of interest (this includes the 6 respiratory symptoms) and fever for only those in the "Qdata_infected_febrile_day1to6" df ILI_V3_donors_infected_febrile_studyday1to6 <- Qdata_infected_febrile_day1to6 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% filter(Microneut_VisitType == "Q baseline") %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # This is great but the way the data is put together, this leaves out the day -3 through day0 data # Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0 # Then we will bind it back to the "Symptomatic_donors_infected_V3_febrile" that was just created. ILI_V3_donors_infected_before_day1_febrile <- Qdata_infected_febrile_day1to6 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) # Now binding together and sorting ILI_V3_donors_infected_febrile <- bind_rows(ILI_V3_donors_infected_febrile_studyday1to6, ILI_V3_donors_infected_before_day1_febrile) ILI_V3_donors_infected_febrile <- ILI_V3_donors_infected_febrile %>% arrange(SubjectID, StudyDay) # But this definition is just for post day0 so we will filter just day1-6 so use the "ILI_V3_donors_infected_febrile_studyday1to6" df sub <- unique(ILI_V3_donors_infected_febrile_studyday1to6$SubjectID) c_sub <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_donors_infected_febrile_studyday1to6[ILI_V3_donors_infected_febrile_studyday1to6$SubjectID == subid, ] temp1<-temp[,6:15] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { c_sub <- rbind(c_sub, subid) token <- 1 break } } if(token == 1) { break } } if(token == 1) { break } } } # This yields a c_sub vector with 7 subjectIDs # If we want to do the same loop, but exclude symptoms that were positive before study day 1 we use ILI_V3_donors_infected_febrile sub <- unique(ILI_V3_donors_infected_febrile$SubjectID) c_sub2 <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_donors_infected_febrile[ILI_V3_donors_infected_febrile$SubjectID == subid, ] temp1<-temp[,6:12] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { sum1<-0 for (m in 1:(tail(which(temp$StudyDay==-0), n=1))){ sum1<-sum1+temp[m,l] } if (sum1==0) { c_sub2 <- rbind(c_sub2, subid) token<-1 break } } } if(token == 1) { break } } if(token == 1) { break } } } # This yields a c_sub2 vector with 7 subjectIDs, which matches what the less stringent loop from above produced # For now we will you the more stringent criteria # Now get the df of subject IDs from the c_sub2 vector ILI_V3_infected_donors_criteria1_singleday <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") # This is the output for the first criteria for ILI (fever and 1 respiratory symptom for >=24 hours), where the symptoms occured on three times in the same day (counts as >= 24 hours) # Moving to the second part of criteria 1 (febrile plus 2 consecutive study days of a resp. Sx at any freq >=1) # Now to implement the criteria 1 (febrile plus 1 resp Sx) for those who had symptoms over 2 consecutive study days # Use the "Symptomatic_donors_infected_grade123" df, cut it to the 6 resp. symptoms, and febrile only # Remember: the "Symptomatic_donors_infected_grade123" df created under Version 1 of Symptomatic for the infected donors Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days <- Symptomatic_donors_infected_grade123 %>% right_join(Qdata_infected_febrile_day1to6) %>% select(SubjectID, StudyDay, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) # Now implement loop to make this classification of having resp Sx at frequency >=1 over 2 consecutive study days # First without checking for symptoms prior to study day 1 Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6 <- Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) sub <- unique(Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6$SubjectID) c_sub <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6[Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { c_sub <- rbind(c_sub, subid) token<-1 break } } } if (token==1){ break } } } # This yielded a c_sub vector of 7 subjectIDs. # Check to see if any of the symptoms appeared before day 1 and thus should be considered as disqualifying for that symptom to contribute to classification criteria. sub <- unique(Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days$SubjectID) c_sub2 <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days[Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { sum1<-0 for (m in 1:(tail(which(temp$StudyDay==0), n=1))){ sum1<-sum1+temp[m,k] } if (sum1==0) { c_sub2 <- rbind(c_sub2, subid) token<-1 break } } } } if (token==1){ break } } } # This also yielded a vector of 7 subjectIDs # Thus, using the more stringent criteria here doesn't make any difference # For now we will use the more stringent criteria # Create df from the vector of 7 subjectIDs from c_sub2 # Remember this is the classification of febrile with symptoms of >=1 frequency over 2 days ILI_V3_infected_donors_criteria1_2days <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") # Now will merge the 2 parts of criteria 1 for ILI together to get a single set of subjectIDs that meet the 1st ILI criteria # Reminder that 1st ILI criteria is: # febrile (>37.9C) plus >=24 hours of a respiratory symptom (one of the 6 self-reported resp Sxs) # The "2 parts of criteria 1" refer to: # 1) febrile plus 3 observations of self-reported resp. Sx in a single day, and # 2) febrile plus 2 consecutive days of at least 1 self-reported resp. Sx at any daily frequency >=1 ILI_V3_infected_donors_criteria1 <- full_join(ILI_V3_infected_donors_criteria1_singleday, ILI_V3_infected_donors_criteria1_2days) ## Plan for implementing the second criteria for ILI_V3: # That is: >= 2 symptoms for >=24 hours, 1 of which is respiratory, and merging with the first criteria for ILI # To do this, first we will filter those subject IDs without fever (find the always afebrile group) # Then we will see who among the always afebrile had: # >=1 resp. symptom on 1 day plus at least one other symptom on the same single day (grade >=1 for 3/3 observations in a day) # Then we will see who among the always afebrile had: # >=1 resp. sympom at frequency >=1 over 2 days plus at least one other resp. symptom at freq >=1 for same 2 days # Then we will add those subject IDs (from the above 2 criteria) together to form ILI_V3_infected_donors_criteria2 # Then we will merge ILI_V3_infected_donors_criteria1 and ILI_V3_infected_donors_criteria2 together to make ILI_V3_infected_donors # First we will get the list of subjectIDs who never had fever (the always afebrile group) # Now we can see who among the infected subject IDs had fever at least once over study days 1-6 Qdata_infected_afebrile_day1to6 <- Qdata_infected_donors %>% select (SubjectID) %>% anti_join(Qdata_infected_febrile_day1to6) # Now we will see who among these always afebrile had resp. symptom on a single day plus at least one other respiratory symptom on the same single day (grade >=1 for 3/3 observations in a day) # First we are going to cut the a new df that has the 6 self-reported respiratory symptoms of interest plus the 3 self-reported non-resp symptoms, and fever (just for kicks, it should always be <37.9 because we already filtered but interesting to have the values) # for only those in the "Qdata_infected_afebrile_day1to6" df ILI_V3_donors_infected_afebrile <- Qdata_infected_afebrile_day1to6 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% filter(Microneut_VisitType == "Q baseline") %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # This is great but the way the data is put together, this leaves out the day -3 through day0 data # Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0 # Then we will bind it back to the "Symptomatic_V3_donors_infected_afebrile" that was just created. ILI_V3_donors_infected_before_day1_afebrile <- Qdata_infected_afebrile_day1to6 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) # Now binding together and sorting ILI_V3_donors_infected_afebrile <- bind_rows(ILI_V3_donors_infected_afebrile, ILI_V3_donors_infected_before_day1_afebrile) ILI_V3_donors_infected_afebrile <- ILI_V3_donors_infected_afebrile %>% arrange(SubjectID, StudyDay) # But, the current definition is just for post day0 so we will filter just day1-6 ILI_V3_donors_infected_afebrile_1to6 <- ILI_V3_donors_infected_afebrile %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) # Loop to get subjectIDs where there were 2 symptoms (one of which respiratory), each observed 3 times on the same day sub <- unique(ILI_V3_donors_infected_afebrile_1to6$SubjectID) c_sub <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_donors_infected_afebrile_1to6[ILI_V3_donors_infected_afebrile_1to6$SubjectID == subid, ] temp1<-temp[,6:15] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { for (m in (l+1):15) { if (sum(temp2[,l]) + sum(temp2[,m]) == 6) { c_sub <- rbind(c_sub, subid) token <- 1 break } } if(token == 1) { break } } if(token == 1) { break } } if(token == 1) { break } } } # This prints a vector c_sub of 11 subjectIDs # If we want to do the same loop, but exclude symptoms that were positive before study day 1 we would do... # Use the "ILI_V3_donors_infected_afebrile" df because it has all three study days for day-3 to day6 for each always afebrile subjectID sub <- unique(ILI_V3_donors_infected_afebrile$SubjectID) c_sub2 <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_donors_infected_afebrile[ILI_V3_donors_infected_afebrile$SubjectID == subid, ] temp1<-temp[,6:13] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 5:10) { for (m in (l+1):13) { if (sum(temp2[,l]) + sum(temp2[,m]) == 6) { sum1<-0 sum2<-0 for (n in 1:(tail(which(temp$StudyDay==0), n=1))){ sum1<-sum1+temp[n,l] sum2<-sum2+temp[n,m] } if (sum1==0 & sum2==0){ c_sub2 <- rbind(c_sub2, subid) token <- 1 break } } } if(token == 1) { break } } if(token == 1) { break } } if(token == 1) { break } } } # This yields a vector of 8 subjectIDs # For now we will take the more stringent criteria that does exclude Sx occuring before day1 # So this means we will stick to using c_sub2 and not c_sub # Now get c_sub2 vector of 11 subjectIDs into adf of subjectIDs ILI_V3_infected_donors_criteria2_2resp_singleday <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") # This "ILI_V3_infected_donors_criteria2_2resp_singleday" df is the output for the 1st part (single day) of the ILI_V3 2nd criteria. # Now for the 2nd part (2 consec days) of the ILI_V3 2nd criteria # Which is >=2 Sx with >=1 of those Sx being respiratory, and the >=2 Sx occuring over 2 consecutive days # Need to make a new df that has, for all of the always afebrile individuals, 1 row of data for each study day-3to6 # Recall: "Symptomatic_donors_infected_grade123" df was created in V1 of Sympomtatic and has all the Sx we need and spans study days-3 to 6, but we want to keep only the always afebrile ones ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6 <- Symptomatic_donors_infected_grade123 %>% right_join(Qdata_infected_afebrile_day1to6) # Now, using the "ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6" df # Columns 5-10 are respiratory symptoms and columns 11-13 are the other symptoms that matter for this definition # Do a loop to list subjectIDs for those with 2 or more symptoms, one of which is respiratory and where this occurred over 2 consecutive study days (at least 2 consecutive study days) # first, do not exclude symptoms that were positive before day 1 as part of the classification criteria # This means we must cut a new df with just data from study days 1-6 ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6 <- ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6 %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) sub <- unique(ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6$SubjectID) c_sub <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6[ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6$SubjectID == subid, ] temp1<-temp[,4:13] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { for (l in (k+1):13){ if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) { if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { c_sub <- rbind(c_sub, subid) token<-1 break } } } if (token==1){ break } } if (token==1){ break } } } # This gives a vector c_sub of 18 subjectIDs # If we want to exclude symptoms that were positive before day 1 as part of the classification criteria, we use the following loop sub <- unique(ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6$SubjectID) c_sub2 <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6[ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6$SubjectID == subid, ] temp1<-temp[,4:13] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { for (l in (k+1):13){ if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) { if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { sum1<-0 sum2<-0 for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){ sum1<-sum1+temp[m,k] sum2<-sum1+temp[m,l] } if (sum1==0 & sum2==0) { c_sub2 <- rbind(c_sub2, subid) token<-1 break } } } } if (token==1){ break } } if (token==1){ break } } } # This yields 18 subjectIDs and doesn't change the result compared with the less stringent criteria. # Thus, we will keep the c_sub2 that is from the more stringent criteria # Get the c_sub2 vector of 18 subjectIDs into a dataframe ILI_V3_infected_donors_criteria2_part2 <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") # merge together the ILI criteria2 parts 1 and 2 ILI_V3_infected_donors_criteria2 <- full_join(ILI_V3_infected_donors_criteria2_2resp_singleday, ILI_V3_infected_donors_criteria2_part2) %>% arrange(SubjectID) # merge together the ILI criteria 1 and 2 dfs ILI_V3_infected_donors <- full_join(ILI_V3_infected_donors_criteria1, ILI_V3_infected_donors_criteria2) %>% arrange(SubjectID) # Now adding the QuarantineNumber on to the ILI df # Then we can sort by Q for the table1 ILI_V3_infected_donors_table1 <- ILI_V3_infected_donors %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>% group_by(QuarantineNumber) %>% summarize(Number_ILI_V3 = n_distinct(SubjectID)) # Add onto Table1 the number of ILI by version 3 criteria and % of infected Qdata_table1 <- Qdata_table1 %>% left_join(ILI_V3_infected_donors_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_ILI_V3_of_Infected = Number_ILI_V3/Number_Infected_Donors) print(Qdata_table1) #### Table 1 (donors): e) Number of febrile (and % of infected) #### # Use the list of infected donors to do this analysis Qdata_infected_febrile <- Qdata_infected_donors %>% left_join(Qdata) %>% filter(Randomization_DorIRorCR == "D" & Tympanic.temp..degrees.C. >37.9) Qdata_infected_febrile_table1 <- Qdata_infected_febrile %>% group_by(QuarantineNumber) %>% summarize(Number_Febrile_Infected = n_distinct(SubjectID)) # Add febrile count and fraction febrile to Table 1 Qdata_table1 <- Qdata_table1 %>% left_join(Qdata_infected_febrile_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Febrile_Infected_of_Total_Infected = Number_Febrile_Infected/Number_Infected_Donors) #### Table 1 (donors): f) Number of PCR confirmed infection (and % of infected) #### # This was already done to get the number of infected donors for the first few columns in this Table 1 # Redo what was done earlier, but tweaking for the purpose of this column in the table 1 # Get list of SubjectID and the number of days each was positive by PCR Qdata_pcr_pos2_or_more_days <- Qdata %>% filter(Randomization_DorIRorCR == "D") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) %>% summarize(NumberDaysPosPCR = n_distinct(StudyDay)) %>% filter(NumberDaysPosPCR >=2) print(Qdata_pcr_pos2_or_more_days) # Add the Q numbers to the list of SubjectIDs and the number of PCR positive days and summarize by Q Qdata_pcr_pos2_or_more_days_table1 <- Qdata_pcr_pos2_or_more_days %>% left_join(Qdata_QuarantineNumbers) %>% group_by(QuarantineNumber) %>% summarize(Number_PCR_Infected_Donors = n_distinct(SubjectID)) print(Qdata_pcr_pos2_or_more_days_table1) # To add to table1 Qdata_table1 <- Qdata_table1 %>% left_join(Qdata_pcr_pos2_or_more_days_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_PCR_Infected_Donors_of_Infected = Number_PCR_Infected_Donors/Number_Infected_Donors) print(Qdata_table1) #### Table 1 (donors): g) Number of PCR confirmed infection and seroconversion (and % of infected) #### # This was already done to get the number of infected donors for the first few columns in this Table 1 # Work with the "Qdata_infected_donors" df that was created in section a) above. Inf_PCR_and_Sero <- Qdata_infected_donors %>% filter(NumberDaysPosPCR >=2) %>% filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) #Now summarize the number of unique SubjectIDs that meet this criteria by Q for the table Inf_PCR_and_Sero_table1 <- Inf_PCR_and_Sero %>% group_by(QuarantineNumber) %>% summarize(Number_Positive_PCR_and_Seroconversion = n_distinct(SubjectID)) # Now add Inf_PCR_and_Sero_table1 to the cumulative table 1 Qdata_table1 <- Qdata_table1 %>% left_join(Inf_PCR_and_Sero_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Infected_by_PCR_and_Serology = Number_Positive_PCR_and_Seroconversion/Number_Infected_Donors) #### Table 1 (donors): h) Number of seroconversion by HAI: MN: Either #### # This was already done to get the number of infected donors for the first few columns in this Table 1 # Reworking here to tailor the current Table 1 columns in question ## HAI # Qdata_HAI_pos is the list (generated in section a) above) with seroconversion by HAI (Glasgow serology) # Group by Q day and summarize number of distinct SubjectIDs Qdata_HAI_pos_table1 <- Qdata_HAI_pos %>% group_by(QuarantineNumber) %>% summarize(Number_HAI_Positive = n_distinct(SubjectID)) # Add HAI_pos column to the table (and % HAI_pos of infected) # For now will comment out the part that creates the percentage for this column Qdata_table1 <- Qdata_table1 %>% left_join(Qdata_HAI_pos_table1, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_HAI_Positive_of_Infected = Number_HAI_Positive/Number_Infected_Donors) ## Microneuts # Qdata_Microneut_pos is the list (generated in section a) above) with seroconversion by Microneuts (CDC serology) # Group by Q day and summarize number of distinct SubjectIDs Qdata_Microneut_pos_table1 <- Qdata_Microneut_pos %>% group_by(QuarantineNumber) %>% summarize(Number_Microneut_Positive = n_distinct(SubjectID)) # Add Microneut_pos column to the table (and % Microneut_pos of infected) # For now will comment out the part that creates the percentage for this column Qdata_table1 <- Qdata_table1 %>% left_join(Qdata_Microneut_pos_table1, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_MN_Positive_of_Infected = Number_Microneut_Positive/Number_Infected_Donors) ## Either HAI or MN # Already have this generated in the Qdata_infected_donors df Pos_Either_HAI_or_MN_table1 <- Qdata_infected_donors %>% filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) %>% group_by(QuarantineNumber) %>% summarize(Positive_By_Either_HAI_or_MN = n_distinct(SubjectID)) # Add Pos_Either_HAI_or_MN_table1 to the cumulative Qdata_table1 # For now will comment out the part that creates the percentage for this column Qdata_table1 <- Qdata_table1 %>% left_join(Pos_Either_HAI_or_MN_table1, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_Positive_By_Either_HAI_or_MN = Positive_By_Either_HAI_or_MN/Number_Infected_Donors) #### Column means, sums, and rounding for Table 1 #### table1_manuscript <- Qdata_table1 # use this as a backup because it is arduous to recreate Qdata_table1 table1_manuscript[is.na(table1_manuscript)] <- 0 table1_manuscript_sums <- table1_manuscript %>% summarise_all(funs(sum)) table1_manuscript_sums <- table1_manuscript %>% full_join(table1_manuscript_sums) # This gets all of the columns to be summed # Now let's work on changing the sums in all of the "percent" columns to the proper fraction # (also perform the *100 transformation to percent) # (also round everything to the nearest whole number) table1_manuscript_sums_fractions <- table1_manuscript_sums %>% mutate(Fraction_Infected_of_Inoculated = (Number_Infected_Donors/Number_Inoculated_Donors)*100, Fraction_Symptomatic_V3_of_Infected = (Number_Symptomatic_V3/Number_Infected_Donors)*100, Fraction_ILI_V3_of_Infected = (Number_ILI_V3/Number_Infected_Donors)*100, Fraction_Febrile_Infected_of_Total_Infected = (Number_Febrile_Infected/Number_Infected_Donors)*100, Fraction_PCR_Infected_Donors_of_Infected = (Number_PCR_Infected_Donors/Number_Infected_Donors)*100, Fraction_Infected_by_PCR_and_Serology = (Number_Positive_PCR_and_Seroconversion/Number_Infected_Donors)*100) %>% mutate_all(funs(round(., 0))) # Now add parentheses to all of these fraction variables because they are presented in manuscript table as percents table1_manuscript_sums_fractions$Fraction_Infected_of_Inoculated <- paste0("(", table1_manuscript_sums_fractions$Fraction_Infected_of_Inoculated, ")") table1_manuscript_sums_fractions$Fraction_Symptomatic_V3_of_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_Symptomatic_V3_of_Infected, ")") table1_manuscript_sums_fractions$Fraction_ILI_V3_of_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_ILI_V3_of_Infected, ")") table1_manuscript_sums_fractions$Fraction_Febrile_Infected_of_Total_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_Febrile_Infected_of_Total_Infected, ")") table1_manuscript_sums_fractions$Fraction_PCR_Infected_Donors_of_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_PCR_Infected_Donors_of_Infected, ")") table1_manuscript_sums_fractions$Fraction_Infected_by_PCR_and_Serology <- paste0("(", table1_manuscript_sums_fractions$Fraction_Infected_by_PCR_and_Serology, ")") # Now bring columns together into more publishable arrangement of data in the display of the table # For example, when we have Infected/Inoculated column, we want to take the data from the Infected column and the data from the Inoculated column, and merge them into a single column, separated by a "/" SI_table1_manuscript <- table1_manuscript_sums_fractions %>% unite(`Infected/Inoculated`, Number_Infected_Donors, Number_Inoculated_Donors, sep = "/", remove = TRUE) %>% unite(`Infected/Inoculated (%)`, `Infected/Inoculated`, Fraction_Infected_of_Inoculated, sep = " ", remove = TRUE) %>% unite(Symptomatic, Number_Symptomatic_V3, Fraction_Symptomatic_V3_of_Infected, sep = " ", remove = TRUE) %>% unite(ILI, Number_ILI_V3, Fraction_ILI_V3_of_Infected, sep = " ", remove = TRUE) %>% unite(Febrile, Number_Febrile_Infected, Fraction_Febrile_Infected_of_Total_Infected, sep = " ", remove = TRUE) %>% unite(`PCR Confirmed Infection`, Number_PCR_Infected_Donors, Fraction_PCR_Infected_Donors_of_Infected, sep = " ", remove = TRUE) %>% unite(`PCR Confirmed Infection and Seroconversion`, Number_Positive_PCR_and_Seroconversion, Fraction_Infected_by_PCR_and_Serology, sep = " ", remove = TRUE) %>% unite(`Seroconversion by HAI : MN : Either`, Number_HAI_Positive, Number_Microneut_Positive, Positive_By_Either_HAI_or_MN, sep = " : ", remove = TRUE) %>% rename('Quarantine #' = QuarantineNumber) SI_table1_manuscript <- SI_table1_manuscript[,c(1:3,5,4,6:8)] # Change the 4th row of the Quarantine # column to "Total" SI_table1_manuscript$`Quarantine #`[4] <- "Total" #### Writing out Table 1 to box sync directory #### write.csv(SI_table1_manuscript, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_prior_sympt_Table1_Manuscript.csv") #### Writing out Table 1 to latex for direct translation of code to table image for paper #### kable(SI_table1_manuscript) %>% kable_styling("striped") %>% add_header_above(c(" " = 2, "Laboratory Confirmed Infection & Illness (% of Infected)" = 3, "Laboratory Confirmed Infection Criteria (% of Infected)" = 3)) datatable(SI_table1_manuscript, filter = 'top', options = list( pageLength = 10, autoWidth = TRUE )) print(xtable(SI_table1_manuscript), comment = FALSE) #### Result: the more stringent criteria reduced Sx by 2 SIDs #### #### * Producing Supplementary Information regarding IR and CR status -- applying stringent criteria for symptomatic and ILI #### ## Applying more stringent criteria to main text Table 1 Symptomatic and ILI definitions ## These analysis have been written in the EMIT_Quarantine_Main_Analysis.R script, however were not used to produce Table 3 # However, c_sub2's were written that used the more stringent criteria (described in the code commentary) # The plan here is to copy over the code that was used to produe the main text Table 3 and use that #### Table 3: a1) IR: Number of infected/ number of exposed (and %) #### # number of exposed IR Exposed_IR <- Qdata %>% filter(Randomization_DorIRorCR == "IR") %>% distinct(SubjectID, .keep_all = TRUE) Exposed_IR_table3 <- Exposed_IR %>% group_by(QuarantineNumber) %>% summarize(NumberExposedIR = n_distinct(SubjectID)) print(Exposed_IR_table3) # number of infected IR # positive by PCR (seroconversion, or PCR positive on more than 1 day) # let's get the list with at least one day PCR positive, then merge up with seroconversion data Qdata_pcr_pos1_or_more_days_IR <- Qdata %>% filter(Randomization_DorIRorCR == "IR") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) %>% summarize(NumberDaysPosPCR_IR = n_distinct(StudyDay)) print(Qdata_pcr_pos1_or_more_days_IR) # let's get the list with seroconversion by Microneuts (CDC serology) # First only select the subjectIDs that were serosusceptible by MN at baseline (<80 at baseline) # Upon the October 12, 2018 conference call with the team, decided to not exclude based on this criteria #Qdata_Microneut_susceptible <- Qdata %>% #filter(Randomization_DorIRorCR == "IR" & Microneut_VisitType == "Q baseline" & Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80) %>% #distinct(SubjectID, .keep_all = FALSE) Qdata_Microneut_pos_IR <- Qdata %>% filter(Randomization_DorIRorCR == "IR" & Microneut_VisitType == "F/up" & Microneut_Seroconvert == 1) %>% distinct(SubjectID, .keep_all = TRUE) %>% select(SubjectID, QuarantineNumber) print(Qdata_Microneut_pos_IR) # let's get the list with seroconversion by HAI (Glasgow serology) # First only select the subjectIDs that were serosusceptible by HAI at baseline (<=10 at baseline) # Following the October 12, 2018 conference call with the team, decided to not exclude based on this criteria #Qdata_HAI_susceptible <- Qdata %>% #filter(Randomization_DorIRorCR == "IR" & HAI_dayminus2_recodeNDA <= 10) %>% #distinct(SubjectID, .keep_all = FALSE) Qdata_HAI_pos_IR <- Qdata %>% filter(Randomization_DorIRorCR == "IR" & HAI_Seroconversion == 1) %>% distinct(SubjectID, .keep_all = TRUE) %>% select(SubjectID, QuarantineNumber) print(Qdata_HAI_pos_IR) # Now let's merge the datasets together to get full list of volunteers who meet positivity criteria Qdata_infected_IR <- Qdata_HAI_pos_IR %>% full_join(Qdata_Microneut_pos_IR, by = c("SubjectID" = "SubjectID")) %>% full_join(Qdata_pcr_pos1_or_more_days_IR, by = c("SubjectID" = "SubjectID")) %>% arrange(SubjectID) print(Qdata_infected_IR) # Identify whom among the pcr positive individuals with only a single day of PCR positivity did not also seroconvert to confirm infection Qdata_1pcrpos_nosero_IR <- Qdata_infected_IR %>% filter(NumberDaysPosPCR_IR ==1) %>% filter(is.na(QuarantineNumber.x) & is.na(QuarantineNumber.y)) # Among the individuals that were positive on only a single day, which study day was the positive day? Qdata_pcr_pos1_or_more_days_studydays_IR <- Qdata %>% filter(Randomization_DorIRorCR == "IR") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) Qdata_pcr_pos1_day_IR <- Qdata_infected_IR %>% filter(NumberDaysPosPCR_IR ==1) %>% left_join(Qdata_pcr_pos1_or_more_days_studydays_IR, by = c("SubjectID" = "SubjectID")) %>% select(-`count`) # Remove those who were only 1 day pcr positive and no seroconversion (protocol criteria for positivity) Qdata_infected_IR <- Qdata_infected_IR %>% anti_join(Qdata_1pcrpos_nosero_IR, by = c("SubjectID" = "SubjectID")) # Summarize number of infected (by any criteria) for each Q # First need to attach a full set of quarantine numbers on the "Qdata_infected" df Qdata_QuarantineNumbers <- Qdata %>% select(SubjectID, QuarantineNumber) %>% distinct(SubjectID, .keep_all = TRUE) %>% filter(!is.na(SubjectID)) Qdata_infected_IR <- Qdata_infected_IR %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) Qdata_infected_IR_table3 <- Qdata_infected_IR %>% group_by(QuarantineNumber) %>% summarize(NumberInfectedIR = n_distinct(SubjectID)) print(Qdata_infected_IR_table3) #### Generation of Table3_IR for paper #### # To output a nice summary table with numInfected, numInoculated, and %infected of inoculated Qdata_table3_IR <- Exposed_IR_table3 %>% left_join(Qdata_infected_IR_table3) %>% mutate(Fraction_Inf_over_ExpIR = NumberInfectedIR/NumberExposedIR) print(Qdata_table3_IR) #### Table 3: a2) CR: Number of infected/ number of exposed (and %) #### # number of exposed CR Exposed_CR <- Qdata %>% filter(Randomization_DorIRorCR == "CR") %>% distinct(SubjectID, .keep_all = TRUE) Exposed_CR_table3 <- Exposed_CR %>% group_by(QuarantineNumber) %>% summarize(NumberExposedCR = n_distinct(SubjectID)) print(Exposed_CR_table3) # number of infected CR # positive by PCR (seroconversion, or PCR positive on more than 1 day) # let's get the list with at least one day PCR positive, then merge up with seroconversion data Qdata_pcr_pos1_or_more_days_CR <- Qdata %>% filter(Randomization_DorIRorCR == "CR") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) %>% summarize(NumberDaysPosPCR_CR = n_distinct(StudyDay)) print(Qdata_pcr_pos1_or_more_days_CR) # let's get the list with seroconversion by Microneuts (CDC serology) # First only select the subjectIDs that were serosusceptible by MN at baseline (<80 at baseline) # Upon the October 12, 2018 conference call with the team, decided to not exclude based on this criteria #Qdata_Microneut_susceptible <- Qdata %>% #filter(Randomization_DorIRorCR == "CR" & Microneut_VisitType == "Q baseline" & Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80) %>% #distinct(SubjectID, .keep_all = FALSE) Qdata_Microneut_pos_CR <- Qdata %>% filter(Randomization_DorIRorCR == "CR" & Microneut_VisitType == "F/up" & Microneut_Seroconvert == 1) %>% distinct(SubjectID, .keep_all = TRUE) %>% select(SubjectID, QuarantineNumber) print(Qdata_Microneut_pos_CR) # let's get the list with seroconversion by HAI (Glasgow serology) # First only select the subjectIDs that were serosusceptible by HAI at baseline (<=10 at baseline) # Upon the October 12, 2018 conference call with the team, decided to not exclude based on this criteria #Qdata_HAI_susceptible <- Qdata %>% #filter(Randomization_DorIRorCR == "CR" & HAI_dayminus2_recodeNDA <= 10) %>% #distinct(SubjectID, .keep_all = FALSE) Qdata_HAI_pos_CR <- Qdata %>% filter(Randomization_DorIRorCR == "CR" & HAI_Seroconversion == 1) %>% distinct(SubjectID, .keep_all = TRUE) %>% select(SubjectID, QuarantineNumber) print(Qdata_HAI_pos_CR) # Now let's merge the datasets together to get full list of volunteers who meet positivity criteria Qdata_infected_CR <- Qdata_HAI_pos_CR %>% full_join(Qdata_Microneut_pos_CR, by = c("SubjectID" = "SubjectID")) %>% full_join(Qdata_pcr_pos1_or_more_days_CR, by = c("SubjectID" = "SubjectID")) %>% arrange(SubjectID) print(Qdata_infected_CR) # Identify whom among the pcr positive individuals with only a single day of PCR positivity did not also seroconvert to confirm infection Qdata_1pcrpos_nosero_CR <- Qdata_infected_CR %>% filter(NumberDaysPosPCR_CR ==1) %>% filter(is.na(QuarantineNumber.x) & is.na(QuarantineNumber.y)) # Among the individuals that were positive on only a single day, which study day was the positive day? Qdata_pcr_pos1_or_more_days_studydays_CR <- Qdata %>% filter(Randomization_DorIRorCR == "CR") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) Qdata_pcr_pos1_day_CR <- Qdata_infected_CR %>% filter(NumberDaysPosPCR_CR ==1) %>% left_join(Qdata_pcr_pos1_or_more_days_studydays_CR, by = c("SubjectID" = "SubjectID")) %>% select(-`count`) # Remove those who were only 1 day pcr positive and no seroconversion (protocol criteria for positivity) Qdata_infected_CR <- Qdata_infected_CR %>% anti_join(Qdata_1pcrpos_nosero_CR, by = c("SubjectID" = "SubjectID")) # Summarize number of infected (by any criteria) for each Q # First need to attach a full set of quarantine numbers on the "Qdata_infected_CR" df Qdata_QuarantineNumbers <- Qdata %>% select(SubjectID, QuarantineNumber) %>% distinct(SubjectID, .keep_all = TRUE) %>% filter(!is.na(SubjectID)) Qdata_infected_CR <- Qdata_infected_CR %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) Qdata_infected_CR_table3 <- Qdata_infected_CR %>% group_by(QuarantineNumber) %>% summarize(NumberInfectedCR = n_distinct(SubjectID)) print(Qdata_infected_CR_table3) #### Generation of Table3_CR for paper #### # To output a nice summary table with numInfected, numInoculated, and %infected of inoculated Qdata_table3_CR <-Exposed_CR_table3 %>% #using the df just created, above left_join(Qdata_infected_CR_table3) %>% mutate(Fraction_Inf_over_ExpCR = NumberInfectedCR/NumberExposedCR) print(Qdata_table3_CR) #### Table 3: IR Symptomatic version 3 (to match Killingley, 2012) #### ## Implementing a new version of "symptomatic" based on October 12, 2018 webex conference with the team ## The purpose of this version of symptomatic is so that we are consistent with the definitions from the proof-of-concept study (Killingley, 2012 JID) # Thus, this version 3 of symptomatic for IR is: # "Any respiratory symptom that occurs at all over 2 consecutive days, or occurs for 3/3 (am, early pm, late pm) symptom measurements on a single day, where respiratory symptoms include runny nose, stuffy nose, sneezing, sore throat, cough, and shortness of breath" # First we are going to cut the a new df that has only the 6 respiratory symtpomms of interest # (and also to include fever, just in case of future analyses) Symptomatic_IR_V3_day1to10 <- Qdata %>% filter(Randomization_DorIRorCR == "IR") %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% filter(Microneut_VisitType == "Q baseline") %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # This is great but the way the data is put together, this leaves out the day -3 through day0 data # Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0 # Then we will bind it back to the "Symptomatic_IR_V3" that was just created. Symptomatic_IR_V3_before_day1 <- Exposed_IR %>% select(SubjectID) %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # Now binding together and sorting Symptomatic_IR_V3 <- bind_rows(Symptomatic_IR_V3_day1to10, Symptomatic_IR_V3_before_day1) Symptomatic_IR_V3 <- Symptomatic_IR_V3 %>% arrange(SubjectID, StudyDay) # We will hold onto the above work for the future, but for now use df Symptomatic_IR_V3_day1to10 # Filter those with three measurements positive in a single study day for any of the respiratory pathogens sub <- unique(Symptomatic_IR_V3_day1to10$SubjectID) c_sub <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- Symptomatic_IR_V3_day1to10[Symptomatic_IR_V3_day1to10$SubjectID == subid, ] temp1<-temp[,6:12] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { c_sub <- rbind(c_sub, subid) token <- 1 break } } if (token == 1) { break } } if (token == 1) { break } } } # This yields a c_sub vector of 5 subjectIDs # However if we wanted to ignore the symptoms from the criteria where there was observation before day 1, we would do a new loop # Note that here we must use the "Symptomatic_IR_V3" df because it includes the data from before day1 sub <- unique(Symptomatic_IR_V3$SubjectID) c_sub2 <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- Symptomatic_IR_V3[Symptomatic_IR_V3$SubjectID == subid, ] temp1<-temp[,6:12] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { sum1<-0 for (m in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1<-sum1+temp[m,l] } if (sum1==0) { token<-1 c_sub2 <- rbind(c_sub2, subid) break } } } if (token == 1) { break } } if (token == 1) { break } } } # This yields a c_sub2 vector with 3 subjectIDs (2 less than the c_sub) and implements the criteria where we eliminated the Sx appearing before day1 # But for now we will use the more stringent criteria # Now get the list of subject IDs from c_sub2 (as opposed to the c_sub version) Symptomatic_IR_singleday <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # Now use a loop to classify those with at least 1 respiratory symptom on two consec days # For this we should use the "Symptomatic_IR_exposed_grade123" df that marks with indicator of 1 when any of the 3 symptom measurements in a day showed evidence of symptoms of any grade. # This df was created in the first version of symptomatic for IR # First need to manipulate the dataset to prepare for the loop logic that was created to do this analysis # Combine symptom severity measures (grades 1, 2, and 3) because grade >1 doesn't matter for this definition of symptomatic afebrile Symptomatic_IR_exposed_grade123 <- Exposed_IR %>% select(SubjectID, QuarantineNumber) %>% left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% summarize(Febrile = max(Febrile), runnyNose123 = max(runnyNose123), stuffyNose123 = max(stuffyNose123), sneezing123 = max(sneezing123), soreThroat123 = max(soreThroat123), DPENasalDischarge123 = max(DPENasalDischarge123), DPEOtits123 = max(DPEOtits123), DPESinusTenderness123 = max(DPESinusTenderness123), DPEPharyngitis123 = max(DPEPharyngitis123), cough123 = max(cough123), SOB123 = max(SOB123), headache123 = max(headache123), muscleAches123 = max(muscleAches123), malaise123 = max(malaise123)) %>% ungroup() # The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was at least one symptoms (of any grade) detection per study day # Now can search for recipients that meet the definition of "symptomatic afebrile" # However we will deal with the "afebrile" component later # First applying the loop that will select those that meet the symptoms criteria apart from afebrile # But first need to create a new df that only has data from study days 1-10 Symptomatic_IR_exposed_grade123_day1to10 <- Symptomatic_IR_exposed_grade123 %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) # First cut the the "Symptomatic_IR_exposed_grade123" df to the variables of interest and proper scale for the loop Symptomatic_IR_exposed_grade123_resp <-Symptomatic_IR_exposed_grade123 %>% select(SubjectID, StudyDay, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) # First we need to get the df for just study days 1-10 Symptomatic_IR_exposed_grade123_resp_day1to10 <- Symptomatic_IR_exposed_grade123_resp %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) sub <- unique(Symptomatic_IR_exposed_grade123_resp_day1to10$SubjectID) c_sub <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_IR_exposed_grade123_resp_day1to10[Symptomatic_IR_exposed_grade123_resp_day1to10$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { c_sub <- rbind(c_sub, subid) token<-1 break } } } if (token==1){ break } } } # This yields a c_sub vector of 11 subjectIDs # If we use the more stringent criteria we use the below loop instead and get c_sub2 sub <- unique(Symptomatic_IR_exposed_grade123_resp$SubjectID) c_sub2 <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_IR_exposed_grade123_resp[Symptomatic_IR_exposed_grade123_resp$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { sum1<-0 for (m in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1<-sum1+temp[m,k] } if (sum1==0) { token <- 1 c_sub2 <- rbind(c_sub2, subid) break } c_sub2 <- rbind(c_sub2, subid) token<-1 break } } } if (token==1){ break } } } # This yields a c_sub2 of 11 subjectIDs (same as c_sub) # For now we will go with the c_sub2 # Rename "V1" as SubjectID using the less stringent c_sub2 Symptomatic_IR_twodays <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # Combine the Symptomatic_donors_infected_singleday df and the Symptomatic_donors_infected_twodays df Symptomatic_IR_V3_combined <- Symptomatic_IR_twodays %>% full_join(Symptomatic_IR_singleday) %>% arrange(SubjectID) # But the above definition of symptomatic doesn't make any mention of febrile illness # Let's check to see if the febrile are already accounted for among the group of symptomatic version 3 Symptomatic_by_fever_IR <- Symptomatic_IR_exposed_grade123 %>% filter(Febrile == 1) %>% select(SubjectID) %>% anti_join(Symptomatic_IR_V3_combined, by = c("SubjectID" = "SubjectID")) # Returned 0 subject IDs, thus adding fever to the analysis doesn't add anything. However we should still be clear about definitions for the paper Symptomatic_IR_V3_combined <- Symptomatic_IR_V3_combined %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>% group_by(QuarantineNumber) %>% summarize(Number_IR_Symptomatic_V3 = n_distinct(SubjectID)) # Add onto Table3_IR the number of symptomatic by version 3 criteria and % of infected Qdata_table3_IR <- Qdata_table3_IR %>% left_join(Symptomatic_IR_V3_combined, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_IR_Symptomatic_V3_of_ExposedIR = Number_IR_Symptomatic_V3/NumberExposedIR) print(Qdata_table3_IR) #### Table 3: CR Symptomatic version 3 (to match Killingley, 2012) #### ## The purpose of this version of symptomatic is so that we are consistent with the definitions from the proof-of-concept study (Killingley, 2012 JID) # Thus, this version 3 of symptomatic for CR is: # "Any respiratory symptom that occurs at all over 2 consecutive days, or occurs for 3/3 (am, early pm, late pm) symptom measurements on a single day, where respiratory symptoms include runny nose, stuffy nose, sneezing, sore throat, cough, and shortness of breath" # First we are going to cut the a new df that has only the 6 respiratory symtpomms of interest # (and also to include fever, just in case of future analyses) Symptomatic_CR_V3_day1to10 <- Qdata %>% filter(Randomization_DorIRorCR == "CR") %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% filter(Microneut_VisitType == "Q baseline") %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # This is great but the way the data is put together, this leaves out the day -3 through day0 data # Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0 # Then we will bind it back to the "Symptomatic_CR_V3" that was just created. Symptomatic_CR_before_day1 <- Exposed_CR %>% select(SubjectID) %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # Now binding together and sorting Symptomatic_CR_V3 <- rbind(Symptomatic_CR_V3_day1to10, Symptomatic_CR_before_day1) Symptomatic_CR_V3 <- Symptomatic_CR_V3 %>% arrange(SubjectID, StudyDay) # We will hold onto the above work for the future, but for now use the Symptomatic_CR_V3_day1to10 df # Filter those with three measurements positive in a single study day for any of the respiratory pathogens sub <- unique(Symptomatic_CR_V3_day1to10$SubjectID) c_sub <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- Symptomatic_CR_V3_day1to10[Symptomatic_CR_V3_day1to10$SubjectID == subid, ] temp1<-temp[,6:12] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { token <-1 c_sub <- rbind(c_sub, subid) break } } if (token == 1) { break } } if (token == 1) { break } } } # This yields a c_sub vector with 8 subjectIDs, however if we want to employ the stringent criteria where Sx are removed from the classification criteria if they appear before day1, then use the next loop. sub <- unique(Symptomatic_CR_V3$SubjectID) c_sub2 <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- Symptomatic_CR_V3[Symptomatic_CR_V3$SubjectID == subid, ] temp1<-temp[,6:12] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { if (sum(temp2[,l]) == 3) { sum1<-0 for (m in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1<-sum1+temp[m,l] } if (sum1==0) { token <- 1 c_sub2 <- rbind(c_sub2, subid) break } } } if (token == 1) { break } } if (token == 1) { break } } } # This yields a c_sub2 for 6 subjectIDs (2 fewer than the c_sub) # For now we will go with the more stringent criteria and use the c_sub2 # Now get the list of subject IDs from c_sub2 Symptomatic_V3_CR_singleday <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # Now use a loop to classify those with any sort of respiratory symptom on two consecutive days # For this we should use the "Symptomatic_CR_exposed_grade123" df that marks with indicator of 1 when any of the 3 symptom measurements in a day showed evidence of symptoms of any grade. # First need to manipulate the dataset to prepare for the loop logic that was created to do this analysis # Combine symptom severity measures (grades 1, 2, and 3) because grade >1 doesn't matter for this definition of symptomatic afebrile Symptomatic_CR_exposed_grade123 <- Exposed_CR %>% select(SubjectID, QuarantineNumber) %>% left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% summarize(Febrile = max(Febrile), runnyNose123 = max(runnyNose123), stuffyNose123 = max(stuffyNose123), sneezing123 = max(sneezing123), soreThroat123 = max(soreThroat123), DPENasalDischarge123 = max(DPENasalDischarge123), DPEOtits123 = max(DPEOtits123), DPESinusTenderness123 = max(DPESinusTenderness123), DPEPharyngitis123 = max(DPEPharyngitis123), cough123 = max(cough123), SOB123 = max(SOB123), headache123 = max(headache123), muscleAches123 = max(muscleAches123), malaise123 = max(malaise123)) %>% ungroup() # The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was at least one symptoms (of any grade) detection per study day # Now can implement criteria search for recipients that meet the definition of "symptomatic afebrile" # The loop will check for those who meet the symptom definition and we will add the afebrile piece later. # First we need to cut a df that only has data from study days 1-10 Symptomatic_CR_exposed_grade123_day1to10 <- Symptomatic_CR_exposed_grade123 %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) # First cut the the "Symptomatic_CR_exposed_grade123" df to the variables of interest and proper scale for the loop Symptomatic_CR_exposed_grade123_day1to10 <-Symptomatic_CR_exposed_grade123 %>% select(SubjectID, StudyDay, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>% filter(StudyDay == 1 |StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) sub <- unique(Symptomatic_CR_exposed_grade123_day1to10$SubjectID) c_sub <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_CR_exposed_grade123_day1to10[Symptomatic_CR_exposed_grade123_day1to10$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { c_sub <- rbind(c_sub, subid) token<-1 break } } } if (token==1){ break } } } # This yields a c_sub with 12 subjectIDs, however if we want to use the more stringent criteria, where a Sx that appears before day 1 is removed from classification criteria, we use the following df and loop sub <- unique(Symptomatic_CR_exposed_grade123$SubjectID) c_sub2 <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- Symptomatic_CR_exposed_grade123[Symptomatic_CR_exposed_grade123$SubjectID == subid, ] temp1<-temp[,4:10] temp1[is.na(temp1)]<-0 temp<-cbind(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { if (temp[j, k] + temp[j+1, k] == 2){ if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { sum1<-0 for (m in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1<-sum1+temp[m,k] } if (sum1==0) { token <- 1 c_sub2 <- rbind(c_sub2, subid) break } } } } if (token==1){ break } } } # This yields a c_sub2 of 10 subjectIDs (2 fewer than the less stringent criteria c_sub which yielded 12) # We will use the more stringent criteria for this classification # Rename "V1" as SubjectID from the c_sub2 vector of 12 Symptomatic_V3_CR_twodays <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # Combine the Symptomatic_donors_infected_singleday df and the Symptomatic_donors_infected_twodays df Symptomatic_V3_CR_combined <- Symptomatic_V3_CR_twodays %>% full_join(Symptomatic_V3_CR_singleday, by = c("SubjectID" = "SubjectID")) %>% arrange(SubjectID) # But the above definition of symptomatic doesn't make any mention of febrile illness # Let's check to see if the febrile are already accounted for among the group of symptomatic version 3 Symptomatic_by_fever_CR <- Symptomatic_CR_exposed_grade123 %>% filter(Febrile == 1) %>% select(SubjectID) %>% anti_join(Symptomatic_V3_CR_combined, by = c("SubjectID" = "SubjectID")) # Returned 0 subject IDs, thus adding fever to the analysis doesn't add anything. However we should still be clear about definitions for the paper Symptomatic_V3_CR_combined <- Symptomatic_V3_CR_combined %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>% group_by(QuarantineNumber) %>% summarize(Number_Symptomatic_V3_CR = n_distinct(SubjectID)) # Add onto Table3_CR the number of symptomatic by version 3 criteria and % of infected Qdata_table3_CR <- Qdata_table3_CR %>% left_join(Symptomatic_V3_CR_combined, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Symptomatic_V3_CR_of_ExposedCR = Number_Symptomatic_V3_CR/NumberExposedCR) print(Qdata_table3_CR) #### Table 3: d1c) IR: Third version of classification of ILI (to match Killingley et al., 2012) (and % of infected) #### # This definition of ILI is: "an illness lasting >=24 hours with either (1) fever >37.9°C plus at least 1 respiratory symptom or (2) >=2 symptoms, at least 1 of which must be respiratory." # Where "respiratory symptom" means evidence of any grade of runny nose, stuffy nose, sneeze, sore throat, cough, shortness of breath # Where "lasting >=24 hours" means evidence of the symptom over all three instances of symptom measurements for a single day, or evidence of the symptom over two days at any frequency (1-3/3 instances of symptom recordings) # First, let's program the first criteria (fever >37.9C plus at least 1 respiratory symptom) # To do this, we can: # a) create the set of subject IDs that meet the fever criteria, and then check them for # b) evidence of three instances during a single day, or # c) evidence of any frequency of instances >=1 for 2 consecutive days # Then, we can deal with the second criteria for ILI (>=2 symptoms one of which being a respiratory) # Find the SubjectIDs from among the exposed IR, that had fever # First check to see if anyone had fever before day 1. Qdata_IR_febrile_pre_day1 <- Qdata %>% filter(Randomization_DorIRorCR == "IR") %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% filter(Tympanic.temp..degrees.C. >37.9) %>% distinct(SubjectID, .keep_all = FALSE) # As it turns out, none of the IR had fever before day 1 # Now we can see who among the infected subject IDs had fever at least once over study days 1-10 Qdata_IR_febrile_day1to10 <- Qdata %>% filter(Randomization_DorIRorCR == "IR") %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% filter(Tympanic.temp..degrees.C. >37.9) %>% distinct(SubjectID, .keep_all = FALSE) # As it turns out, non of the IR ever had fever during study days 1-10 ILI_V3_IR_criteria1 <- Qdata_IR_febrile_day1to10 # Now we can move to the second criteria for ILI for the IR ## Plan for implementing the second criteria for ILI (>= 2 symptoms for >=24 hours, 1 of which is respiratory) and merging with the first criteria for ILI # To do this, first we will filter those subject IDs without fever. # Then we will see who among those without fever had respiratory symptom on a single day plus at least one other symptom on the same single day # Then we will see who among those without fever had respiratory sympomt at frequency >=1 over 2 days plus at least one other symptom at freq >=1 for same 2 days # Then we will add those subject IDs together to form ILI_IR_criteria2 # Since there were no subjects forming an ILI_IR_criteria1 df, we will use and ILI_IR_criteria2 together to make ILI_IR # First we will filter those subject IDs without fever. # Now we can see who among the exposed IR subject IDs had fever at least once over study days 1-10 Qdata_IR_afebrile_day1to10 <- Exposed_IR %>% select (SubjectID) %>% anti_join(Qdata_IR_febrile_day1to10) # Now we will see who among those without fever had respiratory symptom on a single day plus at least one other symptom on the same single day # First we are going to cut the a new df that has the 6 respiratory symptoms of interest plus the 3 non-resp symptoms, and fever # for only those in the "Qdata_IR_afebrile_day1to10" df ILI_V3_IR_afebrile_day1to10 <- Qdata_IR_afebrile_day1to10 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% filter(Microneut_VisitType == "Q baseline") %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # This is great but the way the data is put together, this leaves out the day -3 through day0 data # Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0 # Then we will bind it back to the "Symptomatic_donors_infected_V3_afebrile" that was just created. ILI_V3_IR_afebrile_before_day1 <- Qdata_IR_afebrile_day1to10 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) # Now binding together and sorting ILI_V3_IR_afebrile <- bind_rows(ILI_V3_IR_afebrile_day1to10, ILI_V3_IR_afebrile_before_day1) ILI_V3_IR_afebrile <- ILI_V3_IR_afebrile %>% arrange(SubjectID, StudyDay) # But, the current definition is just for post day0 so we will filter just day1-10 "ILI_V3_IR_afebrile_day1to10" df # Loop to get subjectIDs where there were 2 symptoms (one of which respiratory), each observed 3 times on the same day sub <- unique(ILI_V3_IR_afebrile_day1to10$SubjectID) c_sub <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_IR_afebrile_day1to10[ILI_V3_IR_afebrile_day1to10$SubjectID == subid, ] temp1<-temp[,6:15] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { for (m in (l+1):15) { if (sum(temp2[,l]) + sum(temp2[,m]) == 6) { token <- 1 c_sub <- rbind(c_sub, subid) break } } if(token == 1) { break } } if(token == 1) { break } } if(token == 1) { break } } } # This yields a c_sub of 2 subjectIDs, but if we wanted to exclude symptoms that appeared before day 1 we could do a new loop sub <- unique(ILI_V3_IR_afebrile$SubjectID) c_sub2 <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_IR_afebrile[ILI_V3_IR_afebrile$SubjectID == subid, ] temp1<-temp[,6:15] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { for (m in (l+1):15) { if (sum(temp2[,l]) + sum(temp2[,m]) == 6) { sum1<-0 sum2<-0 for (n in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1<-sum1+temp[n,l] sum2<-sum2+temp[n,m] } if (sum1==0 & sum2==0) { token<-1 c_sub2 <- rbind(c_sub2, subid) break } } } if(token == 1) { break } } if(token == 1) { break } } if(token == 1) { break } } } # This c_sub2 vector holds 2 subjectIDs, just like the less stringent c_sub # For now we will take the more stringent criteria and move the 2 subjectIDs from c_sub2 into a df # Now get the df of subject IDs from c_sub2 ILI_V3_IR_criteria2_singleday <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # This is the output for the first part (single day) of the second ILI criteria # Now implement the second part of the second ILI criteria: >=2 Sx (with >=1 resp) at any frequency over the same 2 consecutive study days # Create an "IR_grade123_afebrile" df by collapsing the three study day values into 1, and select only the 9 symptoms that will be used as part of this analysis (the DPE aren't used here) # The 9 are: runny nose, stuffy nose, sneeze, sore throat, cough, SOB, headache, muscleache, malaise # Also remember to select the afebrile group (those who were never febrile, n = 40, which is actually all the IR) IR_grade123_afebrile_day1to10 <- ILI_V3_IR_afebrile_day1to10 %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% summarize(Febrile = max(Febrile), runnyNose123 = max(runnyNose123), stuffyNose123 = max(stuffyNose123), sneezing123 = max(sneezing123), soreThroat123 = max(soreThroat123), cough123 = max(cough123), SOB123 = max(SOB123), headache123 = max(headache123), muscleAches123 = max(muscleAches123), malaise123 = max(malaise123)) # Bind with the data that comes before day 1 to get complete dataset # Note that the data from before day 1 doesn't have to collapsed to a single measurement per day because we aren't scanning these in the criteria # Rather we are interested in seeing if there were any symtoms before day 1 to implement a more stringent criteria for classifying symptoms ILI_V3_IR_grade123_afebrile <- bind_rows(IR_grade123_afebrile_day1to10, ILI_V3_IR_afebrile_before_day1) ILI_V3_IR_grade123_afebrile <- ILI_V3_IR_grade123_afebrile %>% select(SubjectID, StudyDay, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) %>% arrange(SubjectID, StudyDay) # Columns 5-10 are respiratory symptoms and columns 11-13 are the other symptoms that matter for this definition # Do a loop for 2 or more symptoms one of which is respiratory on with the Sx's occuring on 2 consecutive days at any frequency >=1 sub <- unique(IR_grade123_afebrile_day1to10$SubjectID) c_sub <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- IR_grade123_afebrile_day1to10[IR_grade123_afebrile_day1to10$SubjectID == subid, ] temp1<-temp[,4:13] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { for (l in (k+1):13){ if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) { if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { c_sub <- rbind(c_sub, subid) token<-1 break } } } if (token==1){ break } } if (token==1){ break } } } # This yields a c_sub vector of 5 subjectIDs, however if we want to exclude Sx that occured before day0 we use a new loop sub <- unique(ILI_V3_IR_grade123_afebrile$SubjectID) c_sub2 <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- ILI_V3_IR_grade123_afebrile[ILI_V3_IR_grade123_afebrile$SubjectID == subid, ] temp1<-temp[,4:13] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { for (l in (k+1):13){ if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) { if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { sum1<-0 sum2<-0 for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){ sum1<-sum1+temp[m,k] sum2<-sum1+temp[m,l] } if (sum1==0 & sum2==0) { c_sub2 <- rbind(c_sub2, subid) token<-1 break } } } } if (token==1){ break } } if (token==1){ break } } } # This yields the same 5 subjectIDs as c_sub # For now we will use the more stringent criteria (c_sub2) and thus convert c_sub2 into a df for future manipulation ILI_V3_IR_criteria2 <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # merge together the ILI criteria 1 and 2 dfs ILI_V3_IR <- full_join(ILI_V3_IR_criteria1, ILI_V3_IR_criteria2) # Now adding the QuarantineNumber on to the ILI df # Then we can sort by Q for the table1 ILI_V3_IR_table3 <- ILI_V3_IR %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>% group_by(QuarantineNumber) %>% summarize(Number_ILI_V3_IR = n_distinct(SubjectID)) # Add onto Table3_IR the number of symptomatic by version 3 criteria and % of infected Qdata_table3_IR <- Qdata_table3_IR %>% left_join(ILI_V3_IR_table3, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_ILI_V3_IR_of_ExposedIR = Number_ILI_V3_IR/NumberExposedIR) print(Qdata_table3_IR) #### Table 3: d1c) CR: Third version of classification of ILI (to match Killingley et al., 2012) (and % of infected) #### # This definition of ILI is: "an illness lasting >=24 hours with either (1) fever >37.9°C plus at least 1 respiratory symptom or (2) >=2 symptoms, at least 1 of which must be respiratory." # Where "respiratory symptom" means evidence of any grade of runny nose, stuffy nose, sneeze, sore throat, cough, shortness of breath # Where "lasting >=24 hours" means evidence of the symptom over all three instances of symptom measurements for a single day, or evidence of the symptom over two days at any frequency (1-3/3 instances of symptom recordings) # First, let's program the first criteria (fever >37.9C plus at least 1 respiratory symptom) # To do this, we can: # a) create the set of subject IDs that meet the fever criteria, and then check them for... # b) evidence of three instances during a single day, or # c) evidence of any frequency of instances >=1 for 2 consecutive days # Then, we can deal with the second criteria for ILI (>=2 symptoms one of which being a respiratory) # Find the SubjectIDs from among the exposed CR, that had fever # First check to see if anyone had fever before day 1. Qdata_CR_febrile_pre_day1 <- Qdata %>% filter(Randomization_DorIRorCR == "CR") %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% filter(Tympanic.temp..degrees.C. >37.9) %>% distinct(SubjectID, .keep_all = FALSE) # As it turns out, none of the CR had fever before day 1 # Now we can see who among the infected subject IDs had fever at least once over study days 1-10 Qdata_CR_febrile_day1to10 <- Qdata %>% filter(Randomization_DorIRorCR == "CR") %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% filter(Tympanic.temp..degrees.C. >37.9) %>% distinct(SubjectID, .keep_all = FALSE) # As it turns out, none of the CR ever had fever during study days 1-10 ILI_V3_CR_criteria1 <- Qdata_CR_febrile_day1to10 # Now we can move to the second criteria for ILI for the CR ## Plan for implementing the second criteria for ILI (>= 2 symptoms for >=24 hours, 1 of which is respiratory) and merging with the first criteria for ILI # To do this, first we will filter those subject IDs without fever. # Then we will see who among those without fever had respiratory symptom on a single day plus at least one other symptom on the same single day # Then we will see who among those without fever had respiratory sympomt at frequency >=1 over 2 days plus at least one other resp symptom at freq >=1 for same 2 days # Then we will add those subject IDs together to form ILI_CR_criteria2 # Since there were no subjects forming an ILI_CR_criteria1 df, we will use and ILI_CR_criteria2 together to make ILI_CR # First we will filter those subject IDs without fever. # Now we can see who among the exposed CR subject IDs had fever at least once over study days 1-10 Qdata_CR_afebrile_day1to10 <- Exposed_CR %>% select (SubjectID) %>% anti_join(Qdata_CR_febrile_day1to10) # Now we will see who among those without fever had respiratory symptom on a single day plus at least one other respiratory symptom on the same single day # First we are going to cut the a new df that has the 6 respiratory symptoms of interest plus the 3 non-resp symptoms, and fever # for only those in the "Qdata_CR_afebrile_day1to6" df ILI_V3_CR_afebrile_day1to10 <- Qdata_CR_afebrile_day1to10 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% filter(Microneut_VisitType == "Q baseline") %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # This is great but the way the data is put together, this leaves out the day -3 through day0 data # Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0 # Then we will bind it back to the "Symptomatic_donors_infected_V3_afebrile" that was just created. ILI_V3_CR_before_day1_afebrile <- Qdata_CR_afebrile_day1to10 %>% left_join(Qdata) %>% filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>% mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, LRI = cough+SOB, SystemicI = headache+muscleAches+malaise) %>% mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>% mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3, DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3, cough123 = cough==1 | cough==2 | cough==3, SOB123 = SOB==1 | SOB==2 | SOB==3, headache123 = headache==1 | headache==2 | headache==3, muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, malaise123 = malaise==1 | malaise==2 | malaise==3) %>% mutate(Febrile = as.numeric(Febrile), runnyNose123 = as.numeric(runnyNose123), stuffyNose123 = as.numeric(stuffyNose123), sneezing123 = as.numeric(sneezing123), soreThroat123 = as.numeric(soreThroat123), DPENasalDischarge123 = as.numeric(DPENasalDischarge123), DPEOtits123 = as.numeric(DPEOtits123), DPESinusTenderness123 = as.numeric(DPESinusTenderness123), DPEPharyngitis123 = as.numeric(DPEPharyngitis123), cough123 = as.numeric(cough123), SOB123 = as.numeric(SOB123), headache123 = as.numeric(headache123), muscleAches123 = as.numeric(muscleAches123), malaise123 = as.numeric(malaise123)) %>% select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123, headache123, muscleAches123, malaise123) %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% distinct(SDC_time, .keep_all = TRUE) %>% arrange(SubjectID, StudyDay) %>% ungroup() # Now binding together and sorting ILI_V3_CR_afebrile <- bind_rows(ILI_V3_CR_afebrile_day1to10, ILI_V3_CR_before_day1_afebrile) ILI_V3_CR_afebrile <- ILI_V3_CR_afebrile %>% arrange(SubjectID, StudyDay) # But, the current definition is just for post day0 so we will filter just day1-10: use "ILI_V3_CR_afebrile_day1to10" df # Loop to get subjectIDs where there were 2 symptoms (one of which respiratory), each observed 3 times on the same day sub <- unique(ILI_V3_CR_afebrile_day1to10$SubjectID) c_sub <- c() token <- 0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_CR_afebrile_day1to10[ILI_V3_CR_afebrile_day1to10$SubjectID == subid, ] temp1<-temp[,6:15] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { for (m in (l+1):15) { if (sum(temp2[,l]) + sum(temp2[,m]) == 6) { c_sub <- rbind(c_sub, subid) token <- 1 break } } if(token == 1) { break } } if(token == 1) { break } } if(token == 1) { break } } } # This yields a c_sub vector of 3 subjectIDs # If we wanted to add the more stringent criteria of eliminating Sx that occurred before day1 from the classification criteria sub <- unique(ILI_V3_CR_afebrile$SubjectID) c_sub2 <- c() token <- 0 sum1<-0 sum2<-0 for (i in 1:length(sub)) { token <- 0 subid <- sub[i] temp <- ILI_V3_CR_afebrile[ILI_V3_CR_afebrile$SubjectID == subid, ] temp1<-temp[,6:15] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:5],temp1) stud <- unique(temp$StudyDay) for (j in 1:length(stud)) { studyday <- stud[j] temp2 <- temp[temp$StudyDay == studyday, ] for (k in 1:(nrow(temp2))) { for (l in 7:12) { for (m in (l+1):15) { if (sum(temp2[,l]) + sum(temp2[,m]) == 6) { sum1<-0 sum2<-0 for (o in 1:(tail(which(temp$StudyDay==0), n=1))) { sum1<-sum1+temp[o,l] sum2<-sum2+temp[o,m] } if (sum1==0 & sum2==0) { token<-1 c_sub2 <- rbind(c_sub2, subid) break } } if(token == 1) { break } } if(token == 1) { break } } if(token == 1) { break } } } } # This yields a c_sub2 vector of 3 subjectIDs, which is is the same as using the less stringent criteria, # For now we will use the more stringent criteria # Now get the df of subject IDs from the less stringent c_sub2 ILI_V3_infected_donors_criteria2_singleday <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # This is the output for the first part (single day) of the second ILI criteria # Now implement the second part of the second ILI criteria: >=2 Sx (with >=1 resp) at any frequency over the same 2 consecutive study days # Create an "CR_grade123_afebrile" df by collapsing the three study day values into 1, and select only the 9 symptoms that will be used as part of this analysis (the DPE aren't used here) # The 9 are: runny nose, stuffy nose, sneeze, sore throat, cough, SOB, headache, muscleache, malaise # Also remember to select the afebrile group (those who were never febrile, n = 35, which is actually all the CR) CR_grade123_afebrile <- ILI_V3_CR_afebrile %>% group_by(SubjectID, StudyDay, QuarantineNumber) %>% summarize(Febrile = max(Febrile), runnyNose123 = max(runnyNose123), stuffyNose123 = max(stuffyNose123), sneezing123 = max(sneezing123), soreThroat123 = max(soreThroat123), cough123 = max(cough123), SOB123 = max(SOB123), headache123 = max(headache123), muscleAches123 = max(muscleAches123), malaise123 = max(malaise123)) # Columns 5-10 are respiratory symptoms and columns 11-13 are the other symptoms that matter for this definition # Do a loop for 2 or more symptoms one of which is respiratory # First do this without using symptoms that were positive before day 1 as part of the classification criteria # First need to cut the data to just study days 1-10 CR_grade123_afebrile_day1to10 <- CR_grade123_afebrile %>% filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) # Now grabbing the subjectIDs that meet the criteria for ILI over 2 consecutive study days (with at least 1 Sx as Resp) sub <- unique(CR_grade123_afebrile_day1to10$SubjectID) c_sub <- c() token<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- CR_grade123_afebrile_day1to10[CR_grade123_afebrile_day1to10$SubjectID == subid, ] temp1<-temp[,4:13] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { for (l in (k+1):13){ if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) { if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { c_sub <- rbind(c_sub, subid) token<-1 break } } } if (token==1){ break } } if (token==1){ break } } } # This yields a c_sub of 9 subjectIDs but if we wanted to be more stringent we could exclude the Sx if they occured before study day 1 from the classification criteria sub <- unique(CR_grade123_afebrile$SubjectID) c_sub2 <- c() token<-0 sum1<-0 sum2<-0 for (i in 1:length(sub)) { token<-0 subid <- sub[i] temp <- CR_grade123_afebrile[CR_grade123_afebrile$SubjectID == subid, ] temp1<-temp[,4:13] temp1[is.na(temp1)]<-0 temp<-bind_cols(temp[,1:3],temp1) for (j in 1:(nrow(temp)-1)) { for (k in 5:10) { for (l in (k+1):13){ if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) { if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) { sum1<-0 sum2<-0 for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){ sum1<-sum1+temp[m,k] sum2<-sum1+temp[m,l] } if (sum1==0 & sum2==0) { c_sub2 <- rbind(c_sub2, subid) token<-1 break } } } } if (token==1){ break } } if (token==1){ break } } } # This yields a c_sub2 of 8 subjectIDs (1 less than the c_sub) # We will go with the less stringent criteria for now and use the c_sub2 # Get this ILI second criteria bit into a df using the less stringent c_sub2 vector ILI_V3_CR_criteria2_2days <- as.data.frame(c_sub2) %>% rename(SubjectID = "V1") %>% distinct(SubjectID) # merge the ILI_V3_infected_donors_criteria2_singleday and ILI_V3_CR_criteria2_2days dfs ILI_V3_CR_criteria2 <- full_join(ILI_V3_infected_donors_criteria2_singleday, ILI_V3_CR_criteria2_2days) # merge the ILI criteria 1 and 2 dfs ILI_V3_CR <- full_join(ILI_V3_CR_criteria1, ILI_V3_CR_criteria2) # Now adding the QuarantineNumber on to the ILI df # Then we can sort by Q for the table1 ILI_CR_V3_table3 <- ILI_V3_CR %>% left_join(Qdata_QuarantineNumbers) %>% group_by(QuarantineNumber) %>% summarize(Number_ILI_CR_V3 = n_distinct(SubjectID)) # Add onto Table3_CR the number of symptomatic by version 3 criteria and % of infected Qdata_table3_CR <- Qdata_table3_CR %>% left_join(ILI_CR_V3_table3, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_ILI_V3_CR_of_ExposedCR = Number_ILI_CR_V3/NumberExposedCR) print(Qdata_table3_CR) #### Table 3: e1) IR: Number of febrile (and % of exposed) #### # Use the list of exposed IR to do this analysis Qdata_exposed_febrile_IR <- Qdata %>% filter(Randomization_DorIRorCR == "IR" & Tympanic.temp..degrees.C. >37.9) Qdata_exposed_febrile_table3_IR <- Qdata_exposed_febrile_IR %>% group_by(QuarantineNumber) %>% summarize(Number_Febrile_IR = n_distinct(SubjectID)) # Add febrile count and fraction febrile to Table 3 Qdata_table3_IR <- Qdata_table3_IR %>% left_join(Qdata_exposed_febrile_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Febrile_over_ExposedIR = Number_Febrile_IR/NumberExposedIR) #### Table 3: e2) CR: Number of febrile (and % of exposed) #### # Use the list of exposed CR to do this analysis Qdata_exposed_febrile_CR <- Qdata %>% filter(Randomization_DorIRorCR == "CR" & Tympanic.temp..degrees.C. >37.9) Qdata_exposed_febrile_table3_CR <- Qdata_exposed_febrile_CR %>% group_by(QuarantineNumber) %>% summarize(Number_Febrile_CR = n_distinct(SubjectID)) # Add febrile count and fraction febrile to Table 3 Qdata_table3_CR <- Qdata_table3_CR %>% left_join(Qdata_exposed_febrile_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Febrile_over_ExpCR = Number_Febrile_CR/NumberExposedCR) #### Table 3: f1) IR: Number of PCR confirmed infection (and % of exposed) #### # This was already done to get the number of infected donors for the first few columns in this Table 3 # Redo what was done earlier, but tweaking for the purpose of this column in the table 3 # Get list of SubjectID and the number of days each was positive by PCR Qdata_pcr_pos2_or_more_days_IR <- Qdata %>% filter(Randomization_DorIRorCR == "IR") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) %>% summarize(NumberDaysPosPCR_IR = n_distinct(StudyDay)) %>% filter(NumberDaysPosPCR >=2) print(Qdata_pcr_pos2_or_more_days_IR) # Add the Q numbers to the list of SubjectIDs and the number of PCR positive days and summarize by Q # Note: there was not data on which to add Q numbers here. Running code anyways because to deal with generalized case Qdata_pcr_pos2_or_more_days_table3_IR <- Qdata_pcr_pos2_or_more_days_IR %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>% group_by(QuarantineNumber) %>% summarize(Number_PCR_Infected_IR = n_distinct(SubjectID)) print(Qdata_pcr_pos2_or_more_days_table3_IR) # Add to table3_IR Qdata_table3_IR <- Qdata_table3_IR %>% left_join(Qdata_pcr_pos2_or_more_days_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_PCR_Infected_over_ExposedIR = Number_PCR_Infected_IR/NumberExposedIR) print(Qdata_table3_IR) #### Table 3: f2) CR: Number of PCR confirmed infection (and % of exposed) #### # This was already done to get the number of infected donors for the first few columns in this Table 3 # Redo what was done earlier, but tweaking for the purpose of this column in the table 3 # Get list of SubjectID and the number of days each was positive by PCR Qdata_pcr_pos2_or_more_days_CR <- Qdata %>% filter(Randomization_DorIRorCR == "CR") %>% filter(!is.na(InfA_Ct)) %>% filter((InfA_Ct<38 & InfA_Ct!=0)) %>% group_by(SubjectID, StudyDay) %>% summarize(count = n()) %>% summarize(NumberDaysPosPCR_CR = n_distinct(StudyDay)) %>% filter(NumberDaysPosPCR_CR >=2) print(Qdata_pcr_pos2_or_more_days_CR) # Add the Q numbers to the list of SubjectIDs and the number of PCR positive days and summarize by Q Qdata_pcr_pos2_or_more_days_table3_CR <- Qdata_pcr_pos2_or_more_days_CR %>% left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>% group_by(QuarantineNumber) %>% summarize(Number_PCR_Infected_CR = n_distinct(SubjectID)) print(Qdata_pcr_pos2_or_more_days_table3_CR) # Add to table3_IR Qdata_table3_CR <- Qdata_table3_CR %>% left_join(Qdata_pcr_pos2_or_more_days_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_PCR_Inf_over_ExpCR = Number_PCR_Infected_CR/NumberExposedCR) print(Qdata_table3_CR) #### Table 3: g1) IR: Number of PCR confirmed infection and seroconversion (and % of exposed) #### # This was already done to get the number of infected IR for the first few columns in this Table 3 Inf_PCR_and_Sero_IR <- Qdata_infected_IR %>% filter(NumberDaysPosPCR_IR >=2) %>% filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) #Now summarize the number of unique SubjectIDs that meet this criteria by Q for the table Inf_PCR_and_Sero_table_IR <- Inf_PCR_and_Sero_IR %>% group_by(QuarantineNumber) %>% summarize(Number_PCR_and_Sero_Positive_IR = n_distinct(SubjectID)) # Now add Inf_PCR_and_Sero_table_IR to the cumulative table 3 Qdata_table3_IR <- Qdata_table3_IR %>% left_join(Inf_PCR_and_Sero_table_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR = Number_PCR_and_Sero_Positive_IR/NumberExposedIR) #### Table 3: g2) CR: Number of PCR confirmed infection and seroconversion (and % of exposed) #### # This was already done to get the number of infected CR for the first few columns in this Table 3 Inf_PCR_and_Sero_CR <- Qdata_infected_CR %>% filter(NumberDaysPosPCR_CR >=2) %>% filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) #Now summarize the number of unique SubjectIDs that meet this criteria by Q for the table Inf_PCR_and_Sero_table3_CR <- Inf_PCR_and_Sero_CR %>% group_by(QuarantineNumber) %>% summarize(Number_PCR_and_Sero_Positive_CR = n_distinct(SubjectID)) # Now add Inf_PCR_and_Sero_table1 to the cumulative table 3 Qdata_table3_CR <- Qdata_table3_CR %>% left_join(Inf_PCR_and_Sero_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>% mutate(Fraction_Inf_PCR_and_Sero_Positive_CR = Number_PCR_and_Sero_Positive_CR/NumberExposedCR) #### Table 3: h1) IR: Number of seroconversion by HAI: MN: Either (and % of exposed) #### # This was already done to get the number of infected IR for the first few columns in this Table 1 # Reworking here to tailor the current Table 3 columns in question ## HAI # Qdata_HAI_pos is the list (generated in section a) above) with seroconversion by HAI (Glasgow serology) # Group by Q day and summarize number of distinct SubjectIDs Qdata_HAI_pos_table3_IR <- Qdata_HAI_pos_IR %>% group_by(QuarantineNumber) %>% summarize(Number_HAI_Positive_IR = n_distinct(SubjectID)) # Add HAI_pos column to the table (and % HAI_pos of infected) # Don't need the percentage here for final version of table 3 so commenting out the mutate Qdata_table3_IR <- Qdata_table3_IR %>% left_join(Qdata_HAI_pos_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_HAI_Positive_of_ExposedIR = Number_HAI_Positive_IR/NumberExposedIR) ## Microneuts # Qdata_Microneut_pos is the list (generated in section a) above) with seroconversion by Microneuts (CDC serology) # Group by Q day and summarize number of distinct SubjectIDs Qdata_Microneut_pos_table3_IR <- Qdata_Microneut_pos_IR %>% group_by(QuarantineNumber) %>% summarize(Number_Microneut_Positive_IR = n_distinct(SubjectID)) # Add Microneut_pos column to the table (and % Microneut_pos of infected) # Don't need the percentage here for final version of table 3 so commenting out the mutate Qdata_table3_IR <- Qdata_table3_IR %>% left_join(Qdata_Microneut_pos_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_MN_Positive_of_ExposedIR = Number_Microneut_Positive_IR/NumberExposedIR) ## Either HAI or MN # Already have this generated in the Exposed_IR df Pos_Either_HAI_or_MN_table3_IR <- Qdata_infected_IR %>% filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) %>% group_by(QuarantineNumber) %>% summarize(Pos_Either_HAI_or_MN_IR = n_distinct(SubjectID)) # Add Pos_Either_HAI_or_MN_table3 to the cumulative Qdata_table3 # Don't need the percentage here for final version of table 3 so commenting out the mutate Qdata_table3_IR <- Qdata_table3_IR %>% left_join(Pos_Either_HAI_or_MN_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_Pos_Either_HAI_or_MN_of_ExpIR = Pos_Either_HAI_or_MN_IR/NumberExposedIR) #### Table 3: h2) CR: Number of seroconversion by HAI: MN: Either (and % of exposed) #### # This was already done to get the number of infected CR for the first few columns in this Table 1 # Reworking here to tailor the current Table 3 columns in question ## HAI # Qdata_HAI_pos is the list (generated in section a) above) with seroconversion by HAI (Glasgow serology) # Group by Q day and summarize number of distinct SubjectIDs Qdata_HAI_pos_table3_CR <- Qdata_HAI_pos_CR %>% group_by(QuarantineNumber) %>% summarize(Number_HAI_Positive_CR = n_distinct(SubjectID)) print(Qdata_HAI_pos_table3_CR) # Add HAI_pos column to the table (and % HAI_pos of infected) # Don't need the percentage here for final version of table 3 so commenting out the mutate Qdata_table3_CR <- Qdata_table3_CR %>% left_join(Qdata_HAI_pos_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_HAI_Positive_of_ExposedCR = Number_HAI_Positive_CR/NumberExposedCR) print(Qdata_table3_CR) ## Microneuts # Qdata_Microneut_pos is the list (generated in section a) above) with seroconversion by Microneuts (CDC serology) # Group by Q day and summarize number of distinct SubjectIDs Qdata_Microneut_pos_table3_CR <- Qdata_Microneut_pos_CR %>% group_by(QuarantineNumber) %>% summarize(Number_Microneut_Positive_CR = n_distinct(SubjectID)) print(Qdata_Microneut_pos_table3_CR) # Add Microneut_pos column to the table (and % Microneut_pos of infected) # Don't need the percentage here for final version of table 3 so commenting out the mutate Qdata_table3_CR <- Qdata_table3_CR %>% left_join(Qdata_Microneut_pos_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_MN_Positive_of_ExposedCR = Number_Microneut_Positive_CR/NumberExposedCR) print(Qdata_table3_CR) ## Either HAI or MN # Already have this generated in the Exposed_CR df Pos_Either_HAI_or_MN_table3_CR <- Qdata_infected_CR %>% filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) %>% group_by(QuarantineNumber) %>% summarize(Positive_Either_HAI_or_MN_CR = n_distinct(SubjectID)) print(Pos_Either_HAI_or_MN_table3_CR) # Add Pos_Either_HAI_or_MN_table3 to the cumulative Qdata_table3 # Don't need the percentage here for final version of table 3 so commenting out the mutate Qdata_table3_CR <- Qdata_table3_CR %>% left_join(Pos_Either_HAI_or_MN_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>% #mutate(Fraction_Positive_Either_HAI_or_MN_of_ExposedCR = Positive_Either_HAI_or_MN_CR/NumberExposedCR) #### Column means, sums and rounding for Table 3_IR #### ## IR # First convert all the NAs to 0. Qdata_table3_IR[is.na(Qdata_table3_IR)] <- 0 table3_IR_manuscript <- Qdata_table3_IR # use this as a backup because it is arduous to recreate Qdata_table1 table3_IR_manuscript_sums <- table3_IR_manuscript %>% summarise_all(funs(sum)) table3_IR_manuscript_sums <- table3_IR_manuscript %>% full_join(table3_IR_manuscript_sums) # This gets all of the columns to be summed # Now let's work on changing the sums in all of the "percent" columns to the proper fraction # (also perform the *100 transformation to percent) # (also round everything to the nearest whole number) table3_IR_manuscript_sums_fractions <- table3_IR_manuscript_sums %>% mutate(Fraction_Inf_over_ExpIR = (NumberInfectedIR/NumberExposedIR)*100, Fraction_IR_Symptomatic_V3_of_ExposedIR = (Number_IR_Symptomatic_V3/NumberExposedIR)*100, Fraction_ILI_V3_IR_of_ExposedIR = (Number_ILI_V3_IR/NumberExposedIR)*100, Fraction_Febrile_over_ExposedIR = (Number_Febrile_IR/NumberExposedIR)*100, Fraction_PCR_Infected_over_ExposedIR = (Number_PCR_Infected_IR/NumberExposedIR)*100, Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR = (Number_PCR_and_Sero_Positive_IR/NumberExposedIR)*100) %>% mutate_all(funs(round(., 0))) # Now add parentheses to all of these fraction variables because they are presented in manuscript table as percents table3_IR_manuscript_sums_fractions$Fraction_Inf_over_ExpIR <- paste0("(", table3_IR_manuscript_sums_fractions$Fraction_Inf_over_ExpIR, ")") table3_IR_manuscript_sums_fractions$Fraction_IR_Symptomatic_V3_of_ExposedIR <- paste0("(", table3_IR_manuscript_sums_fractions$Fraction_IR_Symptomatic_V3_of_ExposedIR, ")") table3_IR_manuscript_sums_fractions$Fraction_ILI_V3_IR_of_ExposedIR <- paste0("(", table3_IR_manuscript_sums_fractions$Fraction_ILI_V3_IR_of_ExposedIR, ")") table3_IR_manuscript_sums_fractions$Fraction_Febrile_over_ExposedIR <- paste0("(", table3_IR_manuscript_sums_fractions$Fraction_Febrile_over_ExposedIR, ")") table3_IR_manuscript_sums_fractions$Fraction_PCR_Infected_over_ExposedIR <- paste0("(", table3_IR_manuscript_sums_fractions$Fraction_PCR_Infected_over_ExposedIR, ")") table3_IR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR <- paste0("(", table3_IR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR, ")") # Now bring columns together into more publishable arrangement of data in the display of the table table3_IR_manuscript_unite <- table3_IR_manuscript_sums_fractions %>% unite(`Infected/Exposed`, NumberInfectedIR, NumberExposedIR, sep = "/", remove = TRUE) %>% unite(`Infected/Exposed (%)`, `Infected/Exposed`, Fraction_Inf_over_ExpIR, sep = " ", remove = TRUE) %>% unite(Symptomatic, Number_IR_Symptomatic_V3, Fraction_IR_Symptomatic_V3_of_ExposedIR, sep = " ", remove = TRUE) %>% unite(ILI, Number_ILI_V3_IR, Fraction_ILI_V3_IR_of_ExposedIR, sep = " ", remove = TRUE) %>% unite(Febrile, Number_Febrile_IR, Fraction_Febrile_over_ExposedIR, sep = " ", remove = TRUE) %>% unite(`PCR Confirmed Infection`, Number_PCR_Infected_IR, Fraction_PCR_Infected_over_ExposedIR, sep = " ", remove = TRUE) %>% unite(`PCR Confirmed Infection and Seroconversion`, Number_PCR_and_Sero_Positive_IR, Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR, sep = " ", remove = TRUE) %>% unite(`Seroconversion by HAI : MN : Either`, Number_HAI_Positive_IR, Number_Microneut_Positive_IR, Pos_Either_HAI_or_MN_IR, sep = " : ", remove = TRUE) %>% rename('Quarantine #' = QuarantineNumber) table3_IR_manuscript_unite <- table3_IR_manuscript_unite[,c(1:3,5,4,6:8)] # Change the 4th row of the Quarantine # column to "Total" table3_IR_manuscript_unite$`Quarantine #`[4] <- "Total" # Add a column that indicates in each row the recipient classification (Intervention (IR)) # We need to do this in order to keep the data organized when we merge with the CR group to make 1 table 3 table3_IR_manuscript_unite$`Recipient Classification` <- "Intervention (IR)" SI_table3_IR_manuscript_unite <- table3_IR_manuscript_unite[,c(1,9,2:8)] #### Writing out Table 3_IR to box sync directory #### write.csv(SI_table3_IR_manuscript_unite, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_prior_sympt_Table3_IR_Manuscript.csv") #### Writing out Table 3_IR to latex for direct translation of code to table image for paper #### kable(SI_table3_IR_manuscript_unite) %>% kable_styling("striped") %>% add_header_above(c(" " = 3, "Laboratory Confirmed Infection & Illness (% of Exposed)" = 3, "Laboratory Confirmed Infection Criteria (% of Exposed)" = 3)) datatable(SI_table3_IR_manuscript_unite, filter = 'top', options = list( pageLength = 10, autoWidth = TRUE )) print(xtable(SI_table3_IR_manuscript_unite), comment = FALSE) #### Column means, sums and rounding for Table 3_CR #### ## CR # First convert all the NAs to 0. Qdata_table3_CR[is.na(Qdata_table3_CR)] <- 0 table3_CR_manuscript <- Qdata_table3_CR # use this as a backup because it is arduous to recreate Qdata_table1 table3_CR_manuscript_sums <- table3_CR_manuscript %>% summarise_all(funs(sum)) table3_CR_manuscript_sums <- table3_CR_manuscript %>% full_join(table3_CR_manuscript_sums) # This gets all of the columns to be summed # Now let's work on changing the sums in all of the "percent" columns to the proper fraction # (also perform the *100 transformation to percent) # (also round everything to the nearest whole number) table3_CR_manuscript_sums_fractions <- table3_CR_manuscript_sums %>% mutate(Fraction_Inf_over_ExpCR = (NumberInfectedCR/NumberExposedCR)*100, Fraction_Symptomatic_V3_CR_of_ExposedCR = (Number_Symptomatic_V3_CR/NumberExposedCR)*100, Fraction_ILI_V3_CR_of_ExposedCR = (Number_ILI_CR_V3/NumberExposedCR)*100, Fraction_Febrile_over_ExpCR = (Number_Febrile_CR/NumberExposedCR)*100, Fraction_PCR_Inf_over_ExpCR = (Number_PCR_Infected_CR/NumberExposedCR)*100, Fraction_Inf_PCR_and_Sero_Positive_CR = (Number_PCR_and_Sero_Positive_CR/NumberExposedCR)*100) %>% mutate_all(funs(round(., 0))) # Now add parentheses to all of these fraction variables because they are presented in manuscript table as percents table3_CR_manuscript_sums_fractions$Fraction_Inf_over_ExpCR <- paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Inf_over_ExpCR, ")") table3_CR_manuscript_sums_fractions$Fraction_Symptomatic_V3_CR_of_ExposedCR <- paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Symptomatic_V3_CR_of_ExposedCR, ")") table3_CR_manuscript_sums_fractions$Fraction_ILI_V3_CR_of_ExposedCR <- paste0("(", table3_CR_manuscript_sums_fractions$Fraction_ILI_V3_CR_of_ExposedCR, ")") table3_CR_manuscript_sums_fractions$Fraction_Febrile_over_ExpCR <- paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Febrile_over_ExpCR, ")") table3_CR_manuscript_sums_fractions$Fraction_PCR_Inf_over_ExpCR <- paste0("(", table3_CR_manuscript_sums_fractions$Fraction_PCR_Inf_over_ExpCR, ")") table3_CR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_CR <- paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_CR, ")") # Now bring columns together into more publishable arrangement of data in the display of the table table3_CR_manuscript_unite <- table3_CR_manuscript_sums_fractions %>% unite(`Infected/Exposed`, NumberInfectedCR, NumberExposedCR, sep = "/", remove = TRUE) %>% unite(`Infected/Exposed (%)`, `Infected/Exposed`, Fraction_Inf_over_ExpCR, sep = " ", remove = TRUE) %>% unite(Symptomatic, Number_Symptomatic_V3_CR, Fraction_Symptomatic_V3_CR_of_ExposedCR, sep = " ", remove = TRUE) %>% unite(ILI, Number_ILI_CR_V3, Fraction_ILI_V3_CR_of_ExposedCR, sep = " ", remove = TRUE) %>% unite(Febrile, Number_Febrile_CR, Fraction_Febrile_over_ExpCR, sep = " ", remove = TRUE) %>% unite(`PCR Confirmed Infection`, Number_PCR_Infected_CR, Fraction_PCR_Inf_over_ExpCR, sep = " ", remove = TRUE) %>% unite(`PCR Confirmed Infection and Seroconversion`, Number_PCR_and_Sero_Positive_CR, Fraction_Inf_PCR_and_Sero_Positive_CR, sep = " ", remove = TRUE) %>% unite(`Seroconversion by HAI : MN : Either`, Number_HAI_Positive_CR, Number_Microneut_Positive_CR, Positive_Either_HAI_or_MN_CR, sep = " : ", remove = TRUE) %>% rename('Quarantine #' = QuarantineNumber) table3_CR_manuscript_unite <- table3_CR_manuscript_unite[,c(1:3,5,4,6:8)] # Change the 4th row of the Quarantine # column to "Total" table3_CR_manuscript_unite$`Quarantine #`[4] <- "Total" # Add a column that indicates in each row the recipient classification (Control (CR)) # We need to do this in order to keep the data organized when we merge with the CR group to make 1 table 3 table3_CR_manuscript_unite$`Recipient Classification` <- "Control (CR)" SI_table3_CR_manuscript_unite <- table3_CR_manuscript_unite[,c(1,9,2:8)] #### Writing out Table 3_CR to box sync directory #### write.csv(SI_table3_CR_manuscript_unite, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_prior_sympt_Table3_CR_Manuscript.csv") #### Writing out Table 3_CR to latex for direct translation of code to table image for paper #### kable(SI_table3_CR_manuscript_unite) %>% kable_styling("striped") %>% add_header_above(c(" " = 3, "Laboratory Confirmed Infection & Illness (% of Exposed)" = 3, "Laboratory Confirmed Infection Criteria (% of Exposed)" = 3)) datatable(SI_table3_CR_manuscript_unite, filter = 'top', options = list( pageLength = 10, autoWidth = TRUE )) print(xtable(SI_table3_CR_manuscript_unite), comment = FALSE) #### Building a cumulative Table 3 and writing out to box sync directory and for translation to manuscript #### SI_table3 <- full_join(SI_table3_IR_manuscript_unite, SI_table3_CR_manuscript_unite) %>% arrange(`Quarantine #`, `Recipient Classification`) kable(SI_table3) %>% kable_styling("striped") %>% add_header_above(c(" " = 3, "Laboratory Confirmed Infection & Illness (% of Exposed)" = 3, "Laboratory Confirmed Infection Criteria (% of Exposed)" = 3))%>% kable_styling(full_width = F) %>% column_spec(1, bold = T) %>% collapse_rows(columns = 1, valign = "top") datatable(SI_table3, filter = 'top', options = list( pageLength = 10, autoWidth = TRUE )) print(xtable(SI_table3), comment = FALSE) # Table3 write.csv(SI_table3, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_prior_sympt_Table3_Manuscript.csv") #### RESULT: 2 fewer symptomatic in CR group and 1 less ILI in CR group ####