#Set working directory # # # # # # # #Install packages install.packages("dplyr") install.packages("ggplot2") install.packages("e1071") install.packages("gamlss") install.packages("MuMIn") install.packages("performance") install.packages("fitdistrplus") install.packages("lme4") install.packages("car") install.packages("agricolae") install.packages("reshape") # # # # # # # ##################################POWER ANALYSIS FUNCTION######################################## # R function to implement sample size methods in Cundill & Alexander BMC Med Res Meth # # May be used freely as long as the above paper is cited. # # Supplied in good faith but no liability implied or accepted. # # To use: 'source' this file, or simply copy/paste all the contents into R # # The following examples should reproduce entries in Table III of # Zhu & Lakkis Stat Med 2014, 33(3):376-387. # To run these, examples, the '# ' at the start of each line should be omitted. # The result is the total sample size (including both arms) # # nGLM(link="log", family="NegBinomial", mu0=2, mu1=2*0.35, k0=2, k1=2, power=0.8, method=1) # nGLM(link="log", family="NegBinomial", mu0=2, mu1=2*0.35, k0=2, k1=2, power=0.8, method=2) # nGLM(link="log", family="NegBinomial", mu0=2, mu1=2*0.50, k0=2, k1=2, power=0.8, method=1) # nGLM(link="log", family="NegBinomial", mu0=2, mu1=2*0.50, k0=2, k1=2, power=0.8, method=2) nGLM<-function(mu0, mu1, k0=NULL, k1=k0, d=1, kappa0=NULL, kappa1=kappa0, Q0=0.5, Q1=1-Q0, power, alpha=0.05, link, family, verbose=FALSE, method=2){ linkSupported<-c("log", "I", "logit") if(!is.character(link)){stop("Error: the link argument must be supplied in the character type (in quotation marks).")} if(is.na(match(link, linkSupported))){ stop(paste( "Error: link must be one of ", paste(linkSupported, collapse=', '), ".", sep=""))} familySupported<-c("binomial", "Gamma", "gaussian", "poisson", "NegBinomial") if(!is.character(family)){stop("Error: the family argument must be supplied in the character type (in quotation marks).")} if(is.na(match(family, familySupported))){ stop(paste( "Error: family must be one of ", paste(familySupported, collapse=', '), ".", sep=""))} linkFamily<-paste(link, family, sep="") linkFamilySupported<-c( "logGamma", "logpoisson", "logNegBinomial", "Ibinomial", "IGamma", "Igaussian", "Ipoisson", "INegBinomial", "logitbinomial") if(is.na(match(linkFamily, linkFamilySupported))){ stop("Error: that combination of link and family is not supported.")} linkFn<-match.fun(link) if(family=="Gamma"){ if(is.null(kappa0)){stop("Error: for Gamma family, must supply kappa parameter.")} dispersion0<-kappa0 dispersion1<-kappa1 VarFn<-function(mu, dispersion){mu*mu/dispersion} } if(family=="NegBinomial"){ dispersion0<-k0 dispersion1<-k1 VarFn<-function(mu, dispersion){mu+(mu*mu/dispersion)} } if(family=="binomial"){ dispersion0<-dispersion1<-d VarFn<-function(mu, dispersion){mu*(1-mu)/dispersion} } if(family=="poisson"){ VarFn<-function(mu, dispersion){mu} dispersion0<-dispersion1<-NULL } if(link=="log"){ dMu.dEtaFn<-function(mu){mu} InverseLinkFn<-match.fun('exp') } if(link=="I"){ dMu.dEtaFn<-function(mu){1} InverseLinkFn<-match.fun('I') } if(link=="logit"){ dMu.dEtaFn<-function(mu){mu*(1-mu)} InverseLinkFn<-match.fun('expit') } if(Q0+Q1!=1){stop('Error: Q0 and Q1 should add to 1.')} # if(muNull!='0' & muNull!='average'){ # stop("Error: the muNull argument should be '0' (to use mu0) or 'average' (to use the average, on the link function scale, of mu0 & mu1).") # } Z_alpha<-qnorm(1-(alpha/2)) Z_beta <-qnorm(power) denom<-(linkFn(mu0)-linkFn(mu1)) # if(muNull=='0'){ muNullValue<-mu0 # }else{ # muNullValue<-InverseLinkFn((Q0*linkFn(mu0))+(Q1*linkFn(mu1))) # } numerator1<-sqrt( (VarFn(muNullValue, dispersion0)/((dMu.dEtaFn(muNullValue))^2))*((1/Q1)+(1/Q0)) ) numerator2<-sqrt( ((VarFn(mu0, dispersion0)/((dMu.dEtaFn(mu0 ))^2)) /Q0) + ((VarFn(mu1, dispersion1)/((dMu.dEtaFn(mu1 ))^2)) /Q1) ) if(verbose){ print("dMu/dEta function:") print(dMu.dEtaFn) print(unlist(list("(dMu.dEtaFn(mu0))^2"=((dMu.dEtaFn(mu0))^2)))) print(unlist(list("(dMu.dEtaFn(mu1))^2"=((dMu.dEtaFn(mu1))^2)))) print("variance function:") print(VarFn) print(unlist(list("VarFn(mu0, dispersion0)"=VarFn(mu0, dispersion0)))) print(unlist(list("VarFn(mu1, dispersion1)"=VarFn(mu1, dispersion1)))) print(unlist(list(Z_alpha=Z_alpha, Z_beta=Z_beta, mu0=mu0, dispersion0=dispersion0, mu1=mu1, dispersion1=dispersion1, method=method, denom=denom, numerator1=numerator1, numerator2=numerator2))) } if(method==2){ n<-( ( (Z_alpha+Z_beta) * numerator2 )/denom)^2 }else{ n<-( ( (Z_alpha* numerator1) + (Z_beta * numerator2) )/denom )^2 } # print(linkFn(testdata)) return(n) } # # # # # # # # # # ##################################READ IN AND SET UP DATASET######################################## # # # . # # # #Read in IgA data sets IgA93_dataset = read.csv("TAND93.csv") SubsetA93 <- c(1:4) IgA93_dataset <- IgA93_dataset[,SubsetA93] head(IgA93_dataset) IgA94_dataset = read.csv("TAND94.csv") SubsetA94 <- c(1:4) IgA94_dataset <- IgA94_dataset[,SubsetA94] head(IgA94_dataset) # # # # #Combine and add new columns to Osnat93_dataset # Osnat93_dataset["eos4"] <- (Osnat93_dataset$eos4a + Osnat93_dataset$eos4b) / 2 #mean eos4 Osnat93_dataset["eosct4"] <- (Osnat93_dataset$eos4 * 5.6) #correction for count Osnat93_dataset["eos5"] <- (Osnat93_dataset$eos5a + Osnat93_dataset$eos5b) / 2 #mean eos5 Osnat93_dataset["eosct5"] <- (Osnat93_dataset$eos5 * 5.6) #correction for count # Osnat93_dataset["FEC1"] <- round((Osnat93_dataset$epg1 / 12.5), digits=0)#convert to FEC Osnat93_dataset["FEC2"] <- round((Osnat93_dataset$epg2 / 12.5), digits=0) Osnat93_dataset["FEC3"] <- round((Osnat93_dataset$epg3 / 12.5), digits=0) Osnat93_dataset["FEC4a"] <- round((Osnat93_dataset$epg4a / 12.5), digits=0) Osnat93_dataset["FEC4b"] <- round((Osnat93_dataset$epg4b / 12.5), digits=0) Osnat93_dataset["FEC5a"] <- round((Osnat93_dataset$epg5a / 12.5), digits=0) Osnat93_dataset["FEC5b"] <- round((Osnat93_dataset$epg5b / 12.5), digits=0) # Osnat93_dataset["FEC4"] <- (Osnat93_dataset$FEC4a + Osnat93_dataset$FEC4b) / 2 #mean FEC4 Osnat93_dataset["FEC5"] <- (Osnat93_dataset$FEC5a + Osnat93_dataset$FEC5b) / 2 #mean FEC5 # head(Osnat93_dataset) # # #Combine and add new columns to Osnat94_dataset # Osnat94_dataset["eos4"] <- (Osnat94_dataset$eos4a + Osnat94_dataset$eos4b) / 2 #mean eos4 Osnat94_dataset["eosct4"] <- (Osnat94_dataset$eos4 * 5.6) #correction for count Osnat94_dataset["eos5"] <- (Osnat94_dataset$eos5a + Osnat94_dataset$eos5b) / 2 #mean eos5 Osnat94_dataset["eosct5"] <- (Osnat94_dataset$eos5 * 5.6) #correction for count # Osnat94_dataset["FEC1"] <- round((Osnat94_dataset$epg1 / 12.5), digits=0)#convert to FEC Osnat94_dataset["FEC2"] <- round((Osnat94_dataset$epg2 / 12.5), digits=0) Osnat94_dataset["FEC3"] <- round((Osnat94_dataset$epg3 / 12.5), digits=0) Osnat94_dataset["FEC4"] <- round((Osnat94_dataset$epg4 / 12.5), digits=0) Osnat94_dataset["FEC5"] <- round((Osnat94_dataset$epg5 / 12.5), digits=0) # head(Osnat94_dataset) # # # # # # #Merge Osnat and IgA dataframes Osnat93_dataset <- merge(Osnat93_dataset, IgA93_dataset, all=TRUE) #merges data frames horizontally head(Osnat93_dataset) # Osnat94_dataset <- merge(Osnat94_dataset, IgA94_dataset, all= TRUE) head(Osnat94_dataset) # # # #Subset Osnat dataframes Subset_os93 <- c(1,6:7,15:26,31:35) Osnat93_dataset2 <- Osnat93_dataset[,Subset_os93] head(Osnat93_dataset2) # # Subset_os94 <- c(1:3,13:29) Osnat94_dataset2 <- Osnat94_dataset[,Subset_os94] head(Osnat94_dataset2) # # # # # # # #Merge data frames vertically Osnat_dataset <- rbind(Osnat93_dataset2,setNames(Osnat94_dataset2,names(Osnat93_dataset2))) #merges 2 dataframes on top of each other, using column names from first dataframe # # # # #Convert other variables to factor Osnat_dataset$sex <- as.factor(Osnat_dataset$sex) Osnat_dataset$birthtype <- as.factor(Osnat_dataset$birthtype) Osnat_dataset$Year <- as.factor(Osnat_dataset$Year) Osnat_dataset$field <- as.factor(Osnat_dataset$field) # # Osnat_dataset$IgA_Index <- round(Osnat_dataset$IgA_Index, digits = 3) Osnat_dataset$IgA_Index <- ifelse(Osnat_dataset$IgA_Index < 0, 0, Osnat_dataset$IgA_Index) #replace negative numbers with 0 Osnat_dataset$eosct4 <- ifelse(Osnat_dataset$eosct4 == 0, 0.0001, Osnat_dataset$eosct4) #gamma can't handle zeros in data Osnat_dataset$FEC5 <- ifelse(Osnat_dataset$FEC5 == 6.5, 6, Osnat_dataset$FEC5) #lamb b064 head(Osnat_dataset) # #re-level for GLMM Osnat_dataset$birthtype <- C(Osnat_dataset$birthtype, contr.treatment, base=1) # # # # # # # # # # # ###################################FREQUENCY DISTRIBUTION PLOT######################################## # library(ggplot2) # # #eosct5 (September) eosct5_hist <- ggplot(Osnat_dataset, aes(x=eosct5)) + geom_histogram(color="black", fill="gray", binwidth = 40) + theme_classic() + xlab("Number of Eosinophils (Count)") + ylab("Frequency") + ggtitle("eosct5 - September") + theme(axis.text.x=element_text(size=12), axis.text.y=element_text(size=12), axis.title=element_text(size=12)) eosct5_hist # # #estimate skewness coefficient # library(e1071) #Skewness: <0 = negatively/left-tailed skew, 0 = no skew, >0= positively/right-tail skew skew_eosct5_all <- skewness(Osnat_dataset$eosct5, na.rm= TRUE, type= 1) skew_eosct5_all # # # # # # # # # # # # ###################################GLM TO EXTRACT DISTRIBUTION PARAMETERS######################################## # library(gamlss) #?gamlss will take you to the documentation on the function and lists the codes for all the distribution families that can be used #gamma default link = "log", or "inverse", "identity", "own" # #Check for NAs #sapply(df, function(x) sum(is.na(x))) #We have 28 lambs with missing data for eosct5 # #Use na.omit to exclude NAs from the data before running #gamlss won't run with missing data Osnat_datasetNA <- na.omit(Osnat_dataset) # #Run eosct5 model eosct5_ml <- gamlss(eosct5 ~ 1, data= Osnat_datasetNA, family= GA(mu.link= "inverse", sigma.link= "inverse")) summary(eosct5_ml) library(MuMIn) AIC(eosct5_ml, k=2, c= TRUE) #AICc for model # #Plotting residuals hist(residuals(eosct5_ml)) #additional plots and values to assess normality #Skewness: <0 = negatively/left-tailed skew, 0 = no skew, >0= positively/right-tail skew #Kurtosis: <3= platykurtic distribution,3= Normal distribution, >3= leptokurtic distribution plot(eosct5_ml) #additional plots and values to assess normality # # # # # # # #Extract distribution parameters eosct5_k <- eosct5_ml$sigma.coefficients^2 #sigma is the square root of the usual dispersion parameter for a GLM gamma model #shape = k in Gamma distribution eosct5_k eosct5_Var <- 1 / eosct5_k eosct5_Var eosct5_th <- eosct5_Var / eosct5_ml$mu.coefficients #scale is the variance parameter = theta in Gamma distribution eosct5_th eosct5_Mu1 <- eosct5_k * eosct5_th eosct5_Mu1 eosct5_Rate <- 1 / eosct5_th eosct5_Rate # # # # # # #Produce distribution estimates for distribution and variable to compare to GLM parameters library(fitdistrplus) fitdistr(Osnat_datasetNA$eosct5, "gamma") #estimates shape and rate for gamma distribution for a given variable ## # #You can also use the function 'glm' in the stats package alongside the functions 'gamma.shape' and 'gamma.dispersion' #in the MASS package to double check your estimates # # # # # # # # # # # ##################################GLMM WITH FIXED/RANDOM EFFECTS######################################## #Run using 'glmer' function in package 'lme4' # #Start by fitting the full linear additive model, with all 2-way interactions for the fixed effects. #Use gamma family with log link for Type III Wald Chi-square test # # # #If you want to use glmer by itself, it will call na.omit by default, so don't need to explicitly call it like for gamlss library(lme4) # eosct5_full <- glmer(eosct5 ~ sex + field + birthtype + (1|sire) + (1|dam), data= Osnat_dataset, family= "Gamma"(link= "log")) # # library(car) options(contrasts = c("contr.sum", "contr.poly")) # needed for type III tests Anova(eosct5_full, type="III") # Type III tests # summary(eosct5_full) # # #marginal = the variance explained by the fixed effects only. #conditional = the variance explained by the entire model, including both fixed and random effects. r.squaredGLMM(eosct5_full) #pseudo R squared for GLMM AICc(eosct5_full) # # ##estimated marginal means library(emmeans) # emmeans(eosct5_full, specs= c("sex")) # # # # # # #diffograph of final model #perform KW first (kruskal in agricolae), then input into diffograph function library(agricolae) # Comb_sub <- subset(Osnat_dataset, select= c("birthtype", "eosct5", "age", "field", "sex", "Year")) Comb_sub <- na.omit(Comb_sub) library(dplyr) Comb_sub$birthtype <- recode_factor(Comb_sub$birthtype, "1" = "S", "2" = "T", "3" = "M") # # Comb_birthkw <- kruskal(y= Comb_sub$eosct5, trt= Comb_sub$birthtype, alpha= 0.05, p.adj= c("bonferroni"), group= FALSE) # Diff_combBirth <- diffograph(Comb_birthkw, main= "Combined Birth type", xlab= "September Eosinophil Count", cex.axis= 0.9, cex= 0.9) # # # # # # # # # # ##########################POWER ANALYSIS FOR GAMMA DISTRIBUTION - EOSINOPHILS####################### # # #Create matrix for all possible combinations of power, alpha and kappa, plus blank column for animals #?expand.grid # #Read in Power matrix.csv Power.matrix = read.csv("Power matrix_tut.csv") head(Power.matrix) # Power.matrix2 <- expand.grid("Power" = (seq(from = 0.02, to = 0.98, by = 0.01)), "Animals" = NA) head(Power.matrix2) # library(reshape) Power.matrix3 <- expand.grid.df(Power.matrix, Power.matrix2) head(Power.matrix3) # # # # #Use 'for' loop to calculate number of animals for every row of Power.matrix, using nGLM function #Assign values to blank Animals column in Power.matrix3 #Power.matrix3 for(row.i in 1:nrow(Power.matrix3)) { Animals.i <- nGLM(link="log", family="Gamma", kappa0 = Power.matrix3$k0, kappa1 = Power.matrix3$k1, mu0 = Power.matrix3$Mu0, mu1 = Power.matrix3$Mu1, power = Power.matrix3$Power, alpha=0.05, Q0=0.5, method=2, verbose=TRUE) Power.matrix3$Animals <- Animals.i} # # # #convert variables to factor for mapping Power.matrix3$Animals <- as.numeric(Power.matrix3$Animals) Power.matrix3$Animals <- round(Power.matrix3$Animals, digits = 0) # # # # # #Plot power analysis # # # eosct5_sex_plot <- ggplot(data = Power.matrix3, aes(x = Animals, y = Power, group = Year)) + geom_line(aes(linetype = Year), size=0.7) + scale_linetype_manual(values=c("dashed", "dotted", "solid")) + ylab("Power") + xlab("Total Sample Size") + theme_minimal() + theme(legend.title= element_blank(), legend.position= "top", axis.line=element_line(colour="black"), panel.spacing=unit(2, "lines")) + theme(text= element_text(colour= "black"), (axis.ticks.length = element_line(size=0.2))) + scale_x_continuous(expand= c(0.01,0), limits= c(0,5000),minor_breaks = seq(0,5000,250), breaks=seq(0,5000, by=500)) + scale_y_continuous(expand= c(0,0), limits= c(0,1.05),breaks=seq(0,1.05, by=.1)) + labs(colour= "Comparison") + theme(axis.text.x=element_text(angle=30)) + #theme(legend.text=element_text(size=5)) + theme(plot.margin= unit(c(1,2,1,1), "lines")) + theme(axis.text.x=element_text(size=8, colour="black"), axis.text.y=element_text(size=8, colour= "black"), axis.title=element_text(size=10)) + theme(text= element_text(colour= "black")) + coord_cartesian (clip= "off") + theme(panel.grid.minor = element_line(size = 0.2), panel.grid.major = element_line(size = 0.5)) eosct5_sex_plot # # # # #