--- title: "Simulation Study with cIRT" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Simulation Study with cIRT} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- # Intro The document addresses the simulation study that was performed to understand whether `cIRT` was able to effectively recover parameters under the proposed model. There are three sections to this document. The first section details the simulation setup and implementation. The second section deals with how we obtained overall measurements for each model realization. The third and final section depicts the output structure we used to create the tables within the publication. # Simulation The simulation study below is configured to generate results one might obtain from a pool of 1,000 subjects taking a 20 item test. We obtain summarize the results obtained from running the model 100 times over slightly differing $\theta$ and $\eta$. ```{r sim_setup, eval = F} ### Variables # Y = trial matix # C = KN vector of binary choices # N = #of subjects # J = # of items # K = # of choices # atrue = true item discriminations # btrue = true item locations # thetatrue = true thetas/latent performance # gamma = fixed effects coefficients # Sig = random-effects variance-covariance # subid = id variable for subjects # install mvtnorm is necessary # install.packages("mvtnorm") # Load the Library library(cIRT) # Simulate 2PNO Data N = 1000 # Students J = 20 # Total numbers of possible items per SA # Set a seed for the random generation of a's and b's. set.seed(1337) # Randomly pick a's and b's # Generate as, bs atrue=runif(J)+1 btrue=2*runif(J)-1 save(atrue, btrue, file="a_b_true.rda") # 2 Level Probit Data K = 30 gam_notheta = c(.5,1) gam_theta = c(3,.25) gamma = c(gam_notheta,gam_theta) Sig = matrix(c(.25,0,0,.125),2,2) # Number of replications B = 100 # Begin the Simulation Study for(b in 1:B){ # Provide user with state information cat(paste0("On Iteration:",b,"\n")) set.seed(b + 1234) # True theta and etay thetatrue = rnorm(N) etay = outer(rep(1,N),atrue) * thetatrue - outer(rep(1,N),btrue) # Generate Y for 2PNO model p.correct = pnorm(etay) Y = matrix(rbinom(N*J, 1, p.correct),N,J) ################################################# # Simulating 2 level probit data ################################################# subid = expand.grid(cid = 1:K,sid = 1:N)[,2] pred = rnorm(K*N,0,1) # Pred center_pred = center_matrix(as.matrix(pred)) Xnotheta = cbind(1,center_pred) Xtheta = rep(thetatrue,each=K)*Xnotheta X = cbind(Xnotheta,Xtheta) zetas = mvtnorm::rmvnorm(N,mean=c(0,0),sigma=Sig) # mvtnorm environment accessed W_veczeta = apply(Xnotheta*zetas[rep(1:N,each=K),],1,sum) etac = X%*%gamma + W_veczeta Zc = rnorm(N*K,mean=etac,sd=1) C = 1*(Zc>0) # Run the Choice Item Response Model out1 = cIRT(subid, Xnotheta, c(1,2), Xnotheta, Y, C, 5000) mname = paste0("model_",b) # Assign the data set name assign(mname, out1) # Save the out object save(list=mname, file=paste0(mname,".rda")) # Clean up Export rm(list = c(mname,"out1")) } ``` # Simulation Results After we obtain the 100 different models, we will need to take averages over each models chain and then obtain an overall average. To do so, we used the following script: ```{r sim_results, eval = F} # E[theta] - theta bias = function(theta.star, theta.true){ matrix(mean(theta.star) - theta.true,ncol=1) } bias2 = function(theta.star, theta.true){ matrix(theta.star - theta.true,ncol=1) } # sqrt ( 1/n * sum( (y_i - y.hat_i)^2 ) RMSE = function(y,y.hat){ sqrt( (y-y.hat)^2 ) } # Change true values if needed # True Values gam_notheta = c(.5,1) gam_theta = c(3,.25) gamma = c(gam_notheta,gam_theta) # Loads a and b values load("a_b_true.rda") Sig = as.numeric(matrix(c(.25,0,0,.125),2,2))[c(1,2,4)] B = 100 # Storage to hold bootstrap replications a_result = matrix(0, B, 20) b_result = matrix(0, B, 20) gs0_result = matrix(0, B, 2) beta_result = matrix(0, B, 2) sig_result = array(NA, dim=c(2,2,B)) for(b in 1:B){ mname = paste0("model_",b) load(paste0(mname, ".rda")) d = get(mname) a_result[i,] = apply(d$as, 2, FUN = mean) b_result[i,] = apply(d$bs, 2, FUN = mean) gs0_result[i,] = apply(d$gs0, 2, FUN = mean) beta_result[i,] = apply(d$betas, 2, FUN = mean) sig_result[,,i] = solve(apply(d$Sigma_zeta_inv, c(1,2), FUN = mean)) } # Obtain an overall mean for each of the following: m_a_result = apply(a_result, 2, FUN = mean) m_b_result = apply(b_result, 2, FUN = mean) m_gs0_result = apply(gs0_result, 2, FUN = mean) m_beta_result = apply(beta_result, 2, FUN = mean) m_sig_result = as.numeric(apply(sig_result, c(1,2), FUN = mean))[c(1,2,4)] # Perform a bias evaluation given the true values: a_bias = bias2(m_a_result,atrue) b_bias = bias2(m_b_result,btrue) gs0_bias = bias2(m_gs0_result,gamma[1:2]) beta_bias = bias2(m_beta_result,gamma[3:4]) sig_bias = bias2(m_sig_result, Sig) # Perform the RMSE under the supplied results: a_RMSE = RMSE(m_a_result,atrue) b_RMSE = RMSE(m_b_result,btrue) gs0_RMSE = RMSE(m_gs0_result,gamma[1:2]) beta_RMSE = RMSE(m_beta_result,gamma[3:4]) sig_RMSE = RMSE(m_sig_result, Sig) ``` # Exporting Results for Publication After we obtain the different results, we opted to write export code to format the data in a way that the `xtable` could process and export it. ```{r out, eval = F} # Make a results export results_a = cbind(m_a_result, atrue, a_bias, a_RMSE) rownames(results_a) = paste0("Item ",1:length(atrue)) colnames(results_a) = c("$a$ Estimate", "$a$ True", "$a$ Bias", "$a$ RMSE") results_b = cbind(m_b_result, btrue, b_bias, b_RMSE) rownames(results_b) = paste0("Item ",1:length(btrue)) colnames(results_b) = c("$b$ Estimate", "$b$ True", "$b$ Bias", "$b$ RMSE") results_gs0 = cbind(m_gs0_result,gamma[1:2],gs0_bias,gs0_RMSE) rownames(results_gs0) = paste0("$\\gamma_",1:2,"$") colnames(results_gs0) = c("$\\beta$ Estimate", "$\\beta$ True", "$\\beta$ Bias", "$\\beta$ RMSE") results_beta = cbind(m_beta_result,gamma[3:4],beta_bias,beta_RMSE) rownames(results_beta) = paste0("$\\beta_",1:2,"$") colnames(results_beta) = c("$\\beta$ Estimate", "$\\beta$ True", "$\\beta$ Bias", "$\\beta$ RMSE") results_sig_zeta = cbind(m_sig_result,Sig,sig_bias,sig_RMSE) rownames(results_sig_zeta) = c("$\\Zeta_{1,1}$","$\\Zeta_{2,1} = $\\Zeta_{1,2}$", "$\\Zeta_{2,2}$") colnames(results_sig_zeta) = c("$\\Sigma_{\\Zeta}$ Estimate", "$\\beta$ True", "$\\beta$ Bias", "$\\beta$ RMSE") save(results_a, results_b, results_gs0, results_beta, results_sig_zeta, file="sim_results.rda") ```