386 lines
10 KiB
Python
386 lines
10 KiB
Python
"""
|
||
CTT (Classical Test Theory) Scoring Engine.
|
||
|
||
Implements exact Excel formulas for:
|
||
- p-value (Tingkat Kesukaran): p = Σ Benar / Total Peserta
|
||
- Bobot (Weight): Bobot = 1 - p
|
||
- NM (Nilai Mentah): NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000
|
||
- NN (Nilai Nasional): NN = 500 + 100 × ((NM - Rataan) / SB)
|
||
|
||
All formulas match PRD Section 13.1 exactly.
|
||
"""
|
||
|
||
import math
|
||
from datetime import datetime, timezone
|
||
from typing import Optional
|
||
|
||
from sqlalchemy import func, select
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.models.item import Item
|
||
from app.models.tryout_stats import TryoutStats
|
||
from app.models.user_answer import UserAnswer
|
||
|
||
|
||
def calculate_ctt_p(total_correct: int, total_participants: int) -> float:
|
||
"""
|
||
Calculate CTT p-value (Tingkat Kesukaran / Difficulty).
|
||
|
||
Formula: p = Σ Benar / Total Peserta
|
||
|
||
Args:
|
||
total_correct: Number of correct answers (Σ Benar)
|
||
total_participants: Total number of participants (Total Peserta)
|
||
|
||
Returns:
|
||
p-value in range [0.0, 1.0]
|
||
|
||
Raises:
|
||
ValueError: If total_participants is 0 or values are invalid
|
||
"""
|
||
if total_participants <= 0:
|
||
raise ValueError("total_participants must be greater than 0")
|
||
if total_correct < 0:
|
||
raise ValueError("total_correct cannot be negative")
|
||
if total_correct > total_participants:
|
||
raise ValueError("total_correct cannot exceed total_participants")
|
||
|
||
p = total_correct / total_participants
|
||
|
||
# Clamp to valid range [0, 1]
|
||
return max(0.0, min(1.0, p))
|
||
|
||
|
||
def calculate_ctt_bobot(p_value: float) -> float:
|
||
"""
|
||
Calculate CTT bobot (weight) from p-value.
|
||
|
||
Formula: Bobot = 1 - p
|
||
|
||
Interpretation:
|
||
- Easy questions (p > 0.70) have low bobot (< 0.30)
|
||
- Difficult questions (p < 0.30) have high bobot (> 0.70)
|
||
- Medium questions (0.30 ≤ p ≤ 0.70) have moderate bobot
|
||
|
||
Args:
|
||
p_value: CTT p-value in range [0.0, 1.0]
|
||
|
||
Returns:
|
||
bobot (weight) in range [0.0, 1.0]
|
||
|
||
Raises:
|
||
ValueError: If p_value is outside [0, 1] range
|
||
"""
|
||
if not 0.0 <= p_value <= 1.0:
|
||
raise ValueError(f"p_value must be in range [0, 1], got {p_value}")
|
||
|
||
bobot = 1.0 - p_value
|
||
|
||
# Clamp to valid range [0, 1]
|
||
return max(0.0, min(1.0, bobot))
|
||
|
||
|
||
def calculate_ctt_nm(total_bobot_siswa: float, total_bobot_max: float) -> int:
|
||
"""
|
||
Calculate CTT NM (Nilai Mentah / Raw Score).
|
||
|
||
Formula: NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000
|
||
|
||
This is equivalent to Excel's SUMPRODUCT calculation where:
|
||
- Total_Bobot_Siswa = Σ(bobot_earned for each correct answer)
|
||
- Total_Bobot_Max = Σ(bobot for all questions)
|
||
|
||
Args:
|
||
total_bobot_siswa: Total weight earned by student
|
||
total_bobot_max: Maximum possible weight (sum of all item bobots)
|
||
|
||
Returns:
|
||
NM (raw score) in range [0, 1000]
|
||
|
||
Raises:
|
||
ValueError: If total_bobot_max is 0 or values are invalid
|
||
"""
|
||
if total_bobot_max <= 0:
|
||
raise ValueError("total_bobot_max must be greater than 0")
|
||
if total_bobot_siswa < 0:
|
||
raise ValueError("total_bobot_siswa cannot be negative")
|
||
|
||
nm = (total_bobot_siswa / total_bobot_max) * 1000
|
||
|
||
# Round to integer and clamp to valid range [0, 1000]
|
||
nm_int = round(nm)
|
||
return max(0, min(1000, nm_int))
|
||
|
||
|
||
def calculate_ctt_nn(nm: int, rataan: float, sb: float) -> int:
|
||
"""
|
||
Calculate CTT NN (Nilai Nasional / Normalized Score).
|
||
|
||
Formula: NN = 500 + 100 × ((NM - Rataan) / SB)
|
||
|
||
Normalizes scores to mean=500, SD=100 distribution.
|
||
|
||
Args:
|
||
nm: Nilai Mentah (raw score) in range [0, 1000]
|
||
rataan: Mean of NM scores
|
||
sb: Standard deviation of NM scores (Simpangan Baku)
|
||
|
||
Returns:
|
||
NN (normalized score) in range [0, 1000]
|
||
|
||
Raises:
|
||
ValueError: If nm is out of range or sb is invalid
|
||
"""
|
||
if not 0 <= nm <= 1000:
|
||
raise ValueError(f"nm must be in range [0, 1000], got {nm}")
|
||
if sb <= 0:
|
||
# If SD is 0 or negative, return default normalized score
|
||
# This handles edge case where all scores are identical
|
||
return 500
|
||
|
||
# Calculate normalized score
|
||
z_score = (nm - rataan) / sb
|
||
nn = 500 + 100 * z_score
|
||
|
||
# Round to integer and clamp to valid range [0, 1000]
|
||
nn_int = round(nn)
|
||
return max(0, min(1000, nn_int))
|
||
|
||
|
||
def categorize_difficulty(p_value: float) -> str:
|
||
"""
|
||
Categorize question difficulty based on CTT p-value.
|
||
|
||
Categories per CTT standards (PRD Section 13.2):
|
||
- p < 0.30 → Sukar (Sulit)
|
||
- 0.30 ≤ p ≤ 0.70 → Sedang
|
||
- p > 0.70 → Mudah
|
||
|
||
Args:
|
||
p_value: CTT p-value in range [0.0, 1.0]
|
||
|
||
Returns:
|
||
Difficulty category: "mudah", "sedang", or "sulit"
|
||
"""
|
||
if p_value > 0.70:
|
||
return "mudah"
|
||
elif p_value >= 0.30:
|
||
return "sedang"
|
||
else:
|
||
return "sulit"
|
||
|
||
|
||
async def calculate_ctt_p_for_item(
|
||
db: AsyncSession, item_id: int
|
||
) -> Optional[float]:
|
||
"""
|
||
Calculate CTT p-value for a specific item from existing responses.
|
||
|
||
Queries all UserAnswer records for the item to calculate:
|
||
p = Σ Benar / Total Peserta
|
||
|
||
Args:
|
||
db: Async database session
|
||
item_id: Item ID to calculate p-value for
|
||
|
||
Returns:
|
||
p-value in range [0.0, 1.0], or None if no responses exist
|
||
"""
|
||
# Count total responses and correct responses
|
||
result = await db.execute(
|
||
select(
|
||
func.count().label("total"),
|
||
func.sum(func.cast(UserAnswer.is_correct, type_=func.INTEGER)).label("correct"),
|
||
).where(UserAnswer.item_id == item_id)
|
||
)
|
||
row = result.first()
|
||
|
||
if row is None or row.total == 0:
|
||
return None
|
||
|
||
return calculate_ctt_p(row.correct or 0, row.total)
|
||
|
||
|
||
async def update_tryout_stats(
|
||
db: AsyncSession,
|
||
website_id: int,
|
||
tryout_id: str,
|
||
nm: int,
|
||
) -> TryoutStats:
|
||
"""
|
||
Incrementally update TryoutStats with new NM score.
|
||
|
||
Updates:
|
||
- participant_count += 1
|
||
- total_nm_sum += nm
|
||
- total_nm_sq_sum += nm²
|
||
- Recalculates rataan (mean) and sb (standard deviation)
|
||
- Updates min_nm and max_nm if applicable
|
||
|
||
Uses Welford's online algorithm for numerically stable variance calculation.
|
||
|
||
Args:
|
||
db: Async database session
|
||
website_id: Website identifier
|
||
tryout_id: Tryout identifier
|
||
nm: New NM score to add
|
||
|
||
Returns:
|
||
Updated TryoutStats record
|
||
"""
|
||
# Get or create TryoutStats
|
||
result = await db.execute(
|
||
select(TryoutStats).where(
|
||
TryoutStats.website_id == website_id,
|
||
TryoutStats.tryout_id == tryout_id,
|
||
)
|
||
)
|
||
stats = result.scalar_one_or_none()
|
||
|
||
if stats is None:
|
||
# Create new stats record
|
||
stats = TryoutStats(
|
||
website_id=website_id,
|
||
tryout_id=tryout_id,
|
||
participant_count=1,
|
||
total_nm_sum=float(nm),
|
||
total_nm_sq_sum=float(nm * nm),
|
||
rataan=float(nm),
|
||
sb=0.0, # SD is 0 for single data point
|
||
min_nm=nm,
|
||
max_nm=nm,
|
||
last_calculated=datetime.now(timezone.utc),
|
||
)
|
||
db.add(stats)
|
||
else:
|
||
# Incrementally update existing stats
|
||
stats.participant_count += 1
|
||
stats.total_nm_sum += nm
|
||
stats.total_nm_sq_sum += nm * nm
|
||
|
||
# Update min/max
|
||
if stats.min_nm is None or nm < stats.min_nm:
|
||
stats.min_nm = nm
|
||
if stats.max_nm is None or nm > stats.max_nm:
|
||
stats.max_nm = nm
|
||
|
||
# Recalculate mean and SD
|
||
n = stats.participant_count
|
||
sum_nm = stats.total_nm_sum
|
||
sum_nm_sq = stats.total_nm_sq_sum
|
||
|
||
# Mean = Σ NM / n
|
||
stats.rataan = sum_nm / n
|
||
|
||
# Variance = (Σ NM² / n) - (mean)²
|
||
# Using population standard deviation
|
||
if n > 1:
|
||
variance = (sum_nm_sq / n) - (stats.rataan ** 2)
|
||
# Clamp variance to non-negative (handles floating point errors)
|
||
variance = max(0.0, variance)
|
||
stats.sb = math.sqrt(variance)
|
||
else:
|
||
stats.sb = 0.0
|
||
|
||
stats.last_calculated = datetime.now(timezone.utc)
|
||
|
||
await db.flush()
|
||
return stats
|
||
|
||
|
||
async def get_total_bobot_max(
|
||
db: AsyncSession,
|
||
website_id: int,
|
||
tryout_id: str,
|
||
level: str = "sedang",
|
||
) -> float:
|
||
"""
|
||
Calculate total maximum bobot for a tryout.
|
||
|
||
Total_Bobot_Max = Σ bobot for all questions in the tryout
|
||
|
||
Args:
|
||
db: Async database session
|
||
website_id: Website identifier
|
||
tryout_id: Tryout identifier
|
||
level: Difficulty level to filter by (default: "sedang")
|
||
|
||
Returns:
|
||
Sum of all item bobots
|
||
|
||
Raises:
|
||
ValueError: If no items found or items have no bobot values
|
||
"""
|
||
result = await db.execute(
|
||
select(func.sum(Item.ctt_bobot)).where(
|
||
Item.website_id == website_id,
|
||
Item.tryout_id == tryout_id,
|
||
Item.level == level,
|
||
)
|
||
)
|
||
total_bobot = result.scalar()
|
||
|
||
if total_bobot is None or total_bobot == 0:
|
||
raise ValueError(
|
||
f"No items with bobot found for tryout {tryout_id}, level {level}"
|
||
)
|
||
|
||
return float(total_bobot)
|
||
|
||
|
||
def convert_ctt_p_to_irt_b(p_value: float) -> float:
|
||
"""
|
||
Convert CTT p-value to IRT difficulty parameter (b).
|
||
|
||
Formula: b ≈ -ln((1-p)/p)
|
||
|
||
This provides an initial estimate for IRT calibration.
|
||
Maps p ∈ (0, 1) to b ∈ (-∞, +∞), typically [-3, +3].
|
||
|
||
Args:
|
||
p_value: CTT p-value in range (0.0, 1.0)
|
||
|
||
Returns:
|
||
IRT b-parameter estimate
|
||
|
||
Raises:
|
||
ValueError: If p_value is at boundaries (0 or 1)
|
||
"""
|
||
if p_value <= 0.0 or p_value >= 1.0:
|
||
# Handle edge cases by clamping
|
||
if p_value <= 0.0:
|
||
return 3.0 # Very difficult
|
||
else:
|
||
return -3.0 # Very easy
|
||
|
||
# b ≈ -ln((1-p)/p)
|
||
odds_ratio = (1 - p_value) / p_value
|
||
b = -math.log(odds_ratio)
|
||
|
||
# Clamp to valid IRT range [-3, +3]
|
||
return max(-3.0, min(3.0, b))
|
||
|
||
|
||
def map_theta_to_nn(theta: float) -> int:
|
||
"""
|
||
Map IRT theta (ability) to NN score for comparison.
|
||
|
||
Formula: NN = 500 + (θ / 3) × 500
|
||
|
||
Maps θ ∈ [-3, +3] to NN ∈ [0, 1000].
|
||
|
||
Args:
|
||
theta: IRT ability estimate in range [-3.0, +3.0]
|
||
|
||
Returns:
|
||
NN score in range [0, 1000]
|
||
"""
|
||
# Clamp theta to valid range
|
||
theta_clamped = max(-3.0, min(3.0, theta))
|
||
|
||
# Map to NN
|
||
nn = 500 + (theta_clamped / 3) * 500
|
||
|
||
# Round and clamp to valid range
|
||
return max(0, min(1000, round(nn)))
|