136 lines
4.2 KiB
Python
136 lines
4.2 KiB
Python
"""
|
|
IRT 1PL (Rasch Model) Maximum Likelihood Estimation
|
|
"""
|
|
import numpy as np
|
|
from scipy.optimize import minimize_scalar, minimize
|
|
|
|
|
|
def estimate_theta(responses, b_params):
|
|
"""
|
|
Estimate student ability theta using MLE for 1PL IRT model.
|
|
|
|
Parameters:
|
|
-----------
|
|
responses : list or array
|
|
Binary responses [0, 1, 1, 0, ...]
|
|
b_params : list or array
|
|
Item difficulty parameters [b1, b2, b3, ...]
|
|
|
|
Returns:
|
|
--------
|
|
float
|
|
Estimated theta (ability), or None if estimation fails
|
|
"""
|
|
responses = np.asarray(responses, dtype=float)
|
|
b_params = np.asarray(b_params, dtype=float)
|
|
|
|
# Edge case: empty or mismatched inputs
|
|
if len(responses) == 0 or len(b_params) == 0:
|
|
return 0.0
|
|
if len(responses) != len(b_params):
|
|
raise ValueError("responses and b_params must have same length")
|
|
|
|
n = len(responses)
|
|
sum_resp = np.sum(responses)
|
|
|
|
# Edge case: all correct - return high theta
|
|
if sum_resp == n:
|
|
return 4.0
|
|
|
|
# Edge case: all incorrect - return low theta
|
|
if sum_resp == 0:
|
|
return -4.0
|
|
|
|
def neg_log_likelihood(theta):
|
|
"""Negative log-likelihood for minimization."""
|
|
exponent = theta - b_params
|
|
# Numerical stability: clip exponent
|
|
exponent = np.clip(exponent, -30, 30)
|
|
p = 1.0 / (1.0 + np.exp(-exponent))
|
|
# Avoid log(0)
|
|
p = np.clip(p, 1e-10, 1 - 1e-10)
|
|
ll = np.sum(responses * np.log(p) + (1 - responses) * np.log(1 - p))
|
|
return -ll
|
|
|
|
result = minimize_scalar(neg_log_likelihood, bounds=(-6, 6), method='bounded')
|
|
|
|
if result.success:
|
|
return float(result.x)
|
|
return 0.0
|
|
|
|
|
|
def estimate_b(responses_matrix):
|
|
"""
|
|
Estimate item difficulty parameters using joint MLE for 1PL IRT model.
|
|
|
|
Parameters:
|
|
-----------
|
|
responses_matrix : 2D array
|
|
Response matrix where rows=students, cols=items
|
|
entries are 0 or 1
|
|
|
|
Returns:
|
|
--------
|
|
numpy.ndarray
|
|
Estimated b parameters for each item, or None if estimation fails
|
|
"""
|
|
responses_matrix = np.asarray(responses_matrix, dtype=float)
|
|
|
|
# Edge case: empty matrix
|
|
if responses_matrix.size == 0:
|
|
return np.array([])
|
|
|
|
if responses_matrix.ndim != 2:
|
|
raise ValueError("responses_matrix must be 2-dimensional")
|
|
|
|
n_students, n_items = responses_matrix.shape
|
|
|
|
if n_students == 0 or n_items == 0:
|
|
return np.zeros(n_items)
|
|
|
|
# Initialize theta and b
|
|
theta = np.zeros(n_students)
|
|
b = np.zeros(n_items)
|
|
|
|
# Check for items with all same responses
|
|
item_sums = np.sum(responses_matrix, axis=0)
|
|
|
|
for iteration in range(20): # EM iterations
|
|
# Update theta for each student
|
|
for i in range(n_students):
|
|
resp_i = responses_matrix[i, :]
|
|
sum_resp = np.sum(resp_i)
|
|
|
|
if sum_resp == n_items:
|
|
theta[i] = 4.0
|
|
elif sum_resp == 0:
|
|
theta[i] = -4.0
|
|
else:
|
|
def neg_ll_student(t):
|
|
exponent = np.clip(t - b, -30, 30)
|
|
p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10)
|
|
return -np.sum(resp_i * np.log(p) + (1 - resp_i) * np.log(1 - p))
|
|
|
|
res = minimize_scalar(neg_ll_student, bounds=(-6, 6), method='bounded')
|
|
theta[i] = res.x if res.success else 0.0
|
|
|
|
# Update b for each item
|
|
for j in range(n_items):
|
|
resp_j = responses_matrix[:, j]
|
|
sum_resp = np.sum(resp_j)
|
|
|
|
if sum_resp == n_students:
|
|
b[j] = -4.0 # Easy item
|
|
elif sum_resp == 0:
|
|
b[j] = 4.0 # Hard item
|
|
else:
|
|
def neg_ll_item(bj):
|
|
exponent = np.clip(theta - bj, -30, 30)
|
|
p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10)
|
|
return -np.sum(resp_j * np.log(p) + (1 - resp_j) * np.log(1 - p))
|
|
|
|
res = minimize_scalar(neg_ll_item, bounds=(-6, 6), method='bounded')
|
|
b[j] = res.x if res.success else 0.0
|
|
|
|
return b
|