## Genomic Prediciton Workshop ## ## Bayesian example ## ## ZL 29-Sept-2020 ## ## zibei.lin@agriculture.vic.gov.au ## ## Set up path for saving R-packages ## .libPaths( 'C:/Data/work/teaching_Sept2020/Rpackage' ) ## Download packages ## # install.packages( 'sommer' ) # install.packages( 'BGLR' ) ## Load packages ## library( 'sommer' ) library( 'BGLR' ) #################################################################################### ## The preparations of 'Phenotypes' and 'Genotypes' repeat the ones in GBLUP example ## ## Load CIMMYT data from sommer ## data( DT_wheat ) ## Phenotypes ## pheno <- DT_wheat ## Save phenotypes in an object called ‘pheno’ pheno <- as.data.frame( pheno ) ## Change the class from ‘matrix’ to ‘data.frame’ colnames( pheno ) <- paste0( 'GrainYield_Env' , 1:4 ) ## Add column names pheno$LineID <- 1:599 ## Add a column of Line ID ## Genotypes ## geno <- GT_wheat ## Save genotypes in an object called ‘geno’ rownames( geno ) <- pheno$LineID ## Add LineID for genotypes ## Target trait: GrainYield_Env2 ## LineIDs 1- 500 as training (phenotypes and genotypes) ## LineIDs 501-599 as validation (genotypes) pheno_GY2_complete <- pheno[ , c( 2, 5 )] ## Pull out columns 2 and 5 from pheno, 'GrainYield_Env2' and 'LineID' pheno_GY2_forTest <- pheno_GY2_complete ## Make a copy for phenotypes pheno_GY2_forTest[ 501:599 , 'GrainYield_Env2' ] <- NA ## Pretend IDs 501-599 do NOT have phenotypes #################################################################################### ## The following three objects in memory if finished GBLUP demo ## #geno #pheno_GY2_complete #pheno_GY2_forTest ## Gemomic prediction - BayesB - Demo ## ## Translate y = µ1n + Xb + e model_term <- list( list( X = geno, model = 'BayesB' ) ) ## Prepare a list 'model_term' for model training ## Setting random seed in R, making results repeatable ## ## Bayesian model, initialize effects for markers by 'guess' ## Different results if giving different random seeds set.seed( 12345 ) ## a positive integer n_iter=1000 ## Set up the number of iterations n_burnIn=500 ## Set up the number of burn-in ans_BayesB <- BGLR( y = pheno_GY2_forTest[,1] , ETA = model_term, nIter = n_iter, burnIn = n_burnIn) ## Interpretation for each part in BGLR() ## Input data (phenotypes) ## model term ## number of iterations ## number of burn-in ## Results ## marker_effects <- ans_BayesB$ETA[[1]]$b ## pull out marker effects head( marker_effects ) ## Have a look geno[ 1:5, 1:5 ] ############################################################### ## Toy - how to calculate GEBVs using marker effects ## ## Assume having 3 individuals,3 markers and marker effects ## geno_toy <- data.frame(m1=rep(1,3), m2=rep(0,3), m3=rep(-1,3)) rownames(geno_toy) <- paste0('ID', 1:3) geno_toy <- as.matrix(geno_toy) marker_effects_toy <- c(0.1, 0.2, 0.3) geno_toy ## have a look at the toys marker_effects_toy ## have a look at the toys geno_toy %*% marker_effects_toy ## element-wise multiplication ################################################################ gebvs_BayesB <- geno %*% marker_effects ## calculate GEBVs, element-wise multiplication head( gebvs_BayesB ) tail( gebvs_BayesB ) ## Pearson correlation between the estimated breeding values and the phenotypes for the validation set cor( gebvs_BayesB[501:599] , pheno_GY2_complete[ 501:599 , 1 ] ) ## Correlation of genomic estimated breeding values bewteen GBLUP and BayesB ## gebvs_GBLUP from the GBLUP example cor( gebvs_GBLUP , gebvs_BayesB )