Files
yellow-bank-soal/irt_1pl_mle.py
Dwindi Ramadhana cf193d7ea0 first commit
2026-03-21 23:32:59 +07:00

136 lines
4.2 KiB
Python

"""
IRT 1PL (Rasch Model) Maximum Likelihood Estimation
"""
import numpy as np
from scipy.optimize import minimize_scalar, minimize
def estimate_theta(responses, b_params):
"""
Estimate student ability theta using MLE for 1PL IRT model.
Parameters:
-----------
responses : list or array
Binary responses [0, 1, 1, 0, ...]
b_params : list or array
Item difficulty parameters [b1, b2, b3, ...]
Returns:
--------
float
Estimated theta (ability), or None if estimation fails
"""
responses = np.asarray(responses, dtype=float)
b_params = np.asarray(b_params, dtype=float)
# Edge case: empty or mismatched inputs
if len(responses) == 0 or len(b_params) == 0:
return 0.0
if len(responses) != len(b_params):
raise ValueError("responses and b_params must have same length")
n = len(responses)
sum_resp = np.sum(responses)
# Edge case: all correct - return high theta
if sum_resp == n:
return 4.0
# Edge case: all incorrect - return low theta
if sum_resp == 0:
return -4.0
def neg_log_likelihood(theta):
"""Negative log-likelihood for minimization."""
exponent = theta - b_params
# Numerical stability: clip exponent
exponent = np.clip(exponent, -30, 30)
p = 1.0 / (1.0 + np.exp(-exponent))
# Avoid log(0)
p = np.clip(p, 1e-10, 1 - 1e-10)
ll = np.sum(responses * np.log(p) + (1 - responses) * np.log(1 - p))
return -ll
result = minimize_scalar(neg_log_likelihood, bounds=(-6, 6), method='bounded')
if result.success:
return float(result.x)
return 0.0
def estimate_b(responses_matrix):
"""
Estimate item difficulty parameters using joint MLE for 1PL IRT model.
Parameters:
-----------
responses_matrix : 2D array
Response matrix where rows=students, cols=items
entries are 0 or 1
Returns:
--------
numpy.ndarray
Estimated b parameters for each item, or None if estimation fails
"""
responses_matrix = np.asarray(responses_matrix, dtype=float)
# Edge case: empty matrix
if responses_matrix.size == 0:
return np.array([])
if responses_matrix.ndim != 2:
raise ValueError("responses_matrix must be 2-dimensional")
n_students, n_items = responses_matrix.shape
if n_students == 0 or n_items == 0:
return np.zeros(n_items)
# Initialize theta and b
theta = np.zeros(n_students)
b = np.zeros(n_items)
# Check for items with all same responses
item_sums = np.sum(responses_matrix, axis=0)
for iteration in range(20): # EM iterations
# Update theta for each student
for i in range(n_students):
resp_i = responses_matrix[i, :]
sum_resp = np.sum(resp_i)
if sum_resp == n_items:
theta[i] = 4.0
elif sum_resp == 0:
theta[i] = -4.0
else:
def neg_ll_student(t):
exponent = np.clip(t - b, -30, 30)
p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10)
return -np.sum(resp_i * np.log(p) + (1 - resp_i) * np.log(1 - p))
res = minimize_scalar(neg_ll_student, bounds=(-6, 6), method='bounded')
theta[i] = res.x if res.success else 0.0
# Update b for each item
for j in range(n_items):
resp_j = responses_matrix[:, j]
sum_resp = np.sum(resp_j)
if sum_resp == n_students:
b[j] = -4.0 # Easy item
elif sum_resp == 0:
b[j] = 4.0 # Hard item
else:
def neg_ll_item(bj):
exponent = np.clip(theta - bj, -30, 30)
p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10)
return -np.sum(resp_j * np.log(p) + (1 - resp_j) * np.log(1 - p))
res = minimize_scalar(neg_ll_item, bounds=(-6, 6), method='bounded')
b[j] = res.x if res.success else 0.0
return b