Overview of simcdm

Overview

Within this document, we highlight the different features of the simcdm package as it relates to simulating cognitive diagnostic modeling data.

Notation

For consistency, we aim to use the following notation.

Denoting individuals:

  • N is the total number of individuals taking the assessment.
  • i is the current individual.

Denoting items:

  • J is the total number of items on the assessment.
  • j is the current item
  • Yij is the observed binary response for individual i (1 ≤ i ≤ N) to item j (1 ≤ j ≤ J).
  • sj is the probability of slipping on item j.
  • gj is the probability of guessing on item j.

Denoting attributes:

  • K is the total number of attributes for the assessment item.
  • k is the current attribute.
  • αi = (αi1, …, αiK) where αi ∈ {0, 1}K and αik is the latent binary attribute for individual i on attribute k (1 ≤ k ≤ K).

Denoting the skill/attribute “Q” matrix:

  • qj = (qj1, …, qjK) be the jth row of Q such that qjk = 1 if attribute k is required for item j and zero otherwise.

Usage

To use simcdm, please load the package.

library(simcdm)

Matrix Simulation

Simulations within this section are done underneath the following settings.

# Set a seed for reproducibility
set.seed(888)

# Setup Parameters
N = 15   # Number of Examinees / Subjects
J = 10   # Number of Items
K = 2    # Number of Skills / Attributes

Identifiable Q Matrix Simulation

Simulate an identifiable Q matrix (J items by K skills).

# Set a seed for reproducibility
set.seed(1512)

# Simulate an identifiable Q matrix
Q = sim_q_matrix(J, K)
Q
##       [,1] [,2]
##  [1,]    1    0
##  [2,]    1    0
##  [3,]    0    1
##  [4,]    0    1
##  [5,]    0    1
##  [6,]    0    1
##  [7,]    1    1
##  [8,]    1    0
##  [9,]    1    0
## [10,]    0    1

η Matrix Simulation

Create the ideal response matrix for each trait (J items by 2K latent classes).

# Set a seed for reproducibility
set.seed(4421)

# Simulate an eta matrix
eta = sim_eta_matrix(K, J, Q)
eta
##       [,1] [,2] [,3] [,4]
##  [1,]    0    0    1    1
##  [2,]    0    0    1    1
##  [3,]    0    1    0    1
##  [4,]    0    1    0    1
##  [5,]    0    1    0    1
##  [6,]    0    1    0    1
##  [7,]    0    0    0    1
##  [8,]    0    0    1    1
##  [9,]    0    0    1    1
## [10,]    0    1    0    1

Attribute profile simulation

Generate latent attribute profile classes (2K latent classes by K skills).

# Create a listing of all attribute classes 
class_alphas = attribute_classes(K)
class_alphas
##      [,1] [,2]
## [1,]    0    0
## [2,]    0    1
## [3,]    1    0
## [4,]    1    1

Generate latent attribute profile class for each subject (N subjects by K skills).

# Set a seed for reproducibility
set.seed(5126)

# Create attributes for a subject 
subject_alphas = sim_subject_attributes(N, K)
subject_alphas
##       [,1] [,2]
##  [1,]    0    0
##  [2,]    1    1
##  [3,]    1    0
##  [4,]    1    1
##  [5,]    0    0
##  [6,]    0    1
##  [7,]    0    1
##  [8,]    1    0
##  [9,]    0    1
## [10,]    0    1
## [11,]    0    1
## [12,]    0    1
## [13,]    1    0
## [14,]    0    0
## [15,]    1    0
# Equivalent to:
# subject_alphas = class_alphas[sample(2 ^ K, N, replace = TRUE),]

DINA Simulation

Simulations within this section are done underneath the following settings.

# Set a seed for reproducibility
set.seed(888)

# Setup Parameters
N = 15   # Number of Examinees / Subjects
J = 10   # Number of Items
K = 2    # Number of Skills / Attributes

# Assign slipping and guessing values for each item
ss = gs = rep(.2, J)

# Simulate identifiable Q matrix
Q = sim_q_matrix(J, K)

# Simulate subject attributes
subject_alphas = sim_subject_attributes(N, K)

DINA Item Simulation

Simulate item data, Y, under DINA model (N by J)

# Set a seed for reproducibility
set.seed(2019)

# Simulate items under the DINA model
items_dina = sim_dina_items(subject_alphas, Q, ss, gs)
items_dina
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    0    0    0    1    1    0    0    0    1     1
##  [2,]    1    1    1    0    1    1    1    1    0     1
##  [3,]    0    0    0    0    1    0    0    0    1     1
##  [4,]    1    1    1    0    0    0    1    1    1     0
##  [5,]    1    1    1    1    1    1    1    1    1     1
##  [6,]    1    1    1    1    0    1    0    1    0     1
##  [7,]    0    0    0    1    1    0    0    0    0     1
##  [8,]    1    0    1    1    1    0    0    1    0     1
##  [9,]    1    1    1    1    0    1    1    1    1     1
## [10,]    0    1    0    1    1    0    0    0    1     1
## [11,]    1    1    1    1    1    1    1    0    1     1
## [12,]    0    0    0    0    1    0    0    0    1     0
## [13,]    0    0    0    0    1    0    0    0    1     1
## [14,]    1    0    0    1    1    0    0    0    0     0
## [15,]    1    1    0    1    0    0    1    1    0     0

DINA Attribute Simulation

Simulate attribute data under DINA model (N by J)

# Set a seed for reproducibility
set.seed(51823)

# Simulate attributes under the DINA model
attributes = sim_dina_attributes(subject_alphas, Q)
attributes
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    0    0    0    1    1    0    0    0    1     1
##  [2,]    1    1    1    1    1    1    1    1    1     1
##  [3,]    0    0    0    1    1    0    0    0    1     1
##  [4,]    1    1    1    0    0    0    1    1    0     0
##  [5,]    1    1    1    1    1    1    1    1    1     1
##  [6,]    1    1    1    1    1    1    1    1    1     1
##  [7,]    0    0    0    1    1    0    0    0    1     1
##  [8,]    1    1    1    1    1    1    1    1    1     1
##  [9,]    1    1    1    1    1    1    1    1    1     1
## [10,]    0    0    0    1    1    0    0    0    1     1
## [11,]    1    1    1    1    1    1    1    1    1     1
## [12,]    0    0    0    1    1    0    0    0    1     1
## [13,]    0    0    0    1    1    0    0    0    1     1
## [14,]    0    0    0    0    0    0    0    0    0     0
## [15,]    1    1    1    0    0    0    1    1    0     0

rRUM Simulation

The rRUM simulations are done using the following settings.

# Set a seed for reproducibility
set.seed(888)

# Setup Parameters
N = 15   # Number of Examinees / Subjects
J = 10   # Number of Items
K = 2    # Number of Skills / Attributes

# The probabilities of answering each item correctly for individuals 
# who do not lack any required attribute
pistar = rep(.9, J)

# Simulate an identifiable Q matrix
Q = sim_q_matrix(J, K)

# Penalties for failing to have each of the required attributes
rstar  = .5 * Q

# Latent Class Probabilities
pis = c(.1, .2, .3, .4)

# Generate latent attribute profile with custom probability (N subjects by K skills)
subject_alphas = sim_subject_attributes(N, K, prob = pis)

# Equivalent to:
# class_alphas = attribute_classes(K)
# subject_alphas = class_alphas[sample(2 ^ K, N, replace = TRUE, prob = pis),]

Simulate rRUM items

Simulate rRUM item data Y (N by J)

# Set a seed for reproducibility
set.seed(912)

# Generate rRUM items
rrum_items = sim_rrum_items(Q, rstar, pistar, subject_alphas)
rrum_items
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    1    1    1    0    1    0    1    1    1     0
##  [2,]    1    1    0    1    1    0    1    0    0     0
##  [3,]    0    1    1    1    0    1    1    1    0     0
##  [4,]    0    1    1    1    0    1    1    1    1     0
##  [5,]    1    1    0    0    1    0    1    1    1     0
##  [6,]    1    1    1    1    0    0    1    1    0     1
##  [7,]    1    0    1    0    1    0    0    0    0     1
##  [8,]    0    0    1    0    0    0    1    0    0     1
##  [9,]    0    1    0    1    1    1    0    1    1     0
## [10,]    1    1    1    1    0    0    0    1    1     0
## [11,]    1    1    1    0    0    0    1    1    0     1
## [12,]    0    1    0    1    1    0    1    1    1     1
## [13,]    1    0    1    1    1    0    1    1    1     1
## [14,]    1    0    1    1    1    1    1    1    1     1
## [15,]    0    0    1    0    1    1    1    1    1     1