first commit

This commit is contained in:
Dwindi Ramadhana
2026-03-21 23:32:59 +07:00
commit cf193d7ea0
57 changed files with 17871 additions and 0 deletions

155
app/services/__init__.py Normal file
View File

@@ -0,0 +1,155 @@
"""
Services module for IRT Bank Soal.
Contains business logic services for:
- IRT calibration
- CAT selection
- WordPress authentication
- AI question generation
- Reporting
"""
from app.services.irt_calibration import (
IRTCalibrationError,
calculate_fisher_information,
calculate_item_information,
calculate_probability,
calculate_theta_se,
estimate_b_from_ctt_p,
estimate_theta_mle,
get_session_responses,
nn_to_theta,
theta_to_nn,
update_session_theta,
update_theta_after_response,
)
from app.services.cat_selection import (
CATSelectionError,
NextItemResult,
TerminationCheck,
check_user_level_reuse,
get_available_levels_for_slot,
get_next_item,
get_next_item_adaptive,
get_next_item_fixed,
get_next_item_hybrid,
should_terminate,
simulate_cat_selection,
update_theta,
)
from app.services.wordpress_auth import (
WordPressAPIError,
WordPressAuthError,
WordPressRateLimitError,
WordPressTokenInvalidError,
WordPressUserInfo,
WebsiteNotFoundError,
SyncStats,
fetch_wordpress_users,
get_or_create_user,
get_wordpress_user,
sync_wordpress_users,
verify_website_exists,
verify_wordpress_token,
)
from app.services.ai_generation import (
call_openrouter_api,
check_cache_reuse,
generate_question,
generate_with_cache_check,
get_ai_stats,
get_prompt_template,
parse_ai_response,
save_ai_question,
validate_ai_model,
SUPPORTED_MODELS,
)
from app.services.reporting import (
generate_student_performance_report,
generate_item_analysis_report,
generate_calibration_status_report,
generate_tryout_comparison_report,
export_report_to_csv,
export_report_to_excel,
export_report_to_pdf,
schedule_report,
get_scheduled_report,
list_scheduled_reports,
cancel_scheduled_report,
StudentPerformanceReport,
ItemAnalysisReport,
CalibrationStatusReport,
TryoutComparisonReport,
ReportSchedule,
)
__all__ = [
# IRT Calibration
"IRTCalibrationError",
"calculate_fisher_information",
"calculate_item_information",
"calculate_probability",
"calculate_theta_se",
"estimate_b_from_ctt_p",
"estimate_theta_mle",
"get_session_responses",
"nn_to_theta",
"theta_to_nn",
"update_session_theta",
"update_theta_after_response",
# CAT Selection
"CATSelectionError",
"NextItemResult",
"TerminationCheck",
"check_user_level_reuse",
"get_available_levels_for_slot",
"get_next_item",
"get_next_item_adaptive",
"get_next_item_fixed",
"get_next_item_hybrid",
"should_terminate",
"simulate_cat_selection",
"update_theta",
# WordPress Auth
"WordPressAPIError",
"WordPressAuthError",
"WordPressRateLimitError",
"WordPressTokenInvalidError",
"WordPressUserInfo",
"WebsiteNotFoundError",
"SyncStats",
"fetch_wordpress_users",
"get_or_create_user",
"get_wordpress_user",
"sync_wordpress_users",
"verify_website_exists",
"verify_wordpress_token",
# AI Generation
"call_openrouter_api",
"check_cache_reuse",
"generate_question",
"generate_with_cache_check",
"get_ai_stats",
"get_prompt_template",
"parse_ai_response",
"save_ai_question",
"validate_ai_model",
"SUPPORTED_MODELS",
# Reporting
"generate_student_performance_report",
"generate_item_analysis_report",
"generate_calibration_status_report",
"generate_tryout_comparison_report",
"export_report_to_csv",
"export_report_to_excel",
"export_report_to_pdf",
"schedule_report",
"get_scheduled_report",
"list_scheduled_reports",
"cancel_scheduled_report",
"StudentPerformanceReport",
"ItemAnalysisReport",
"CalibrationStatusReport",
"TryoutComparisonReport",
"ReportSchedule",
]

View File

@@ -0,0 +1,595 @@
"""
AI Question Generation Service.
Handles OpenRouter API integration for generating question variants.
Implements caching, user-level reuse checking, and prompt engineering.
"""
import json
import logging
import re
from typing import Any, Dict, Literal, Optional, Union
import httpx
from sqlalchemy import and_, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import get_settings
from app.models.item import Item
from app.models.tryout import Tryout
from app.models.user_answer import UserAnswer
from app.schemas.ai import GeneratedQuestion
logger = logging.getLogger(__name__)
settings = get_settings()
# OpenRouter API configuration
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
# Supported AI models
SUPPORTED_MODELS = {
"qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B",
"meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B",
}
# Level mapping for prompts
LEVEL_DESCRIPTIONS = {
"mudah": "easier (simpler concepts, more straightforward calculations)",
"sedang": "medium difficulty",
"sulit": "harder (more complex concepts, multi-step reasoning)",
}
def get_prompt_template(
basis_stem: str,
basis_options: Dict[str, str],
basis_correct: str,
basis_explanation: Optional[str],
target_level: Literal["mudah", "sulit"],
) -> str:
"""
Generate standardized prompt for AI question generation.
Args:
basis_stem: The basis question stem
basis_options: The basis question options
basis_correct: The basis correct answer
basis_explanation: The basis explanation
target_level: Target difficulty level
Returns:
Formatted prompt string
"""
level_desc = LEVEL_DESCRIPTIONS.get(target_level, target_level)
options_text = "\n".join(
[f" {key}: {value}" for key, value in basis_options.items()]
)
explanation_text = (
f"Explanation: {basis_explanation}"
if basis_explanation
else "Explanation: (not provided)"
)
prompt = f"""You are an educational content creator specializing in creating assessment questions.
Given a "Sedang" (medium difficulty) question, generate a new question at a different difficulty level.
BASIS QUESTION (Sedang level):
Question: {basis_stem}
Options:
{options_text}
Correct Answer: {basis_correct}
{explanation_text}
TASK:
Generate 1 new question that is {level_desc} than the basis question above.
REQUIREMENTS:
1. Keep the SAME topic/subject matter as the basis question
2. Use similar context and terminology
3. Create exactly 4 answer options (A, B, C, D)
4. Only ONE correct answer
5. Include a clear explanation of why the correct answer is correct
6. Make the question noticeably {level_desc} - not just a minor variation
OUTPUT FORMAT:
Return ONLY a valid JSON object with this exact structure (no markdown, no code blocks):
{{"stem": "Your question text here", "options": {{"A": "Option A text", "B": "Option B text", "C": "Option C text", "D": "Option D text"}}, "correct": "A", "explanation": "Explanation text here"}}
Remember: The correct field must be exactly "A", "B", "C", or "D"."""
return prompt
def parse_ai_response(response_text: str) -> Optional[GeneratedQuestion]:
"""
Parse AI response to extract question data.
Handles various response formats including JSON code blocks.
Args:
response_text: Raw AI response text
Returns:
GeneratedQuestion if parsing successful, None otherwise
"""
if not response_text:
return None
# Clean the response text
cleaned = response_text.strip()
# Try to extract JSON from code blocks if present
json_patterns = [
r"```json\s*([\s\S]*?)\s*```", # ```json ... ```
r"```\s*([\s\S]*?)\s*```", # ``` ... ```
r"(\{[\s\S]*\})", # Raw JSON object
]
for pattern in json_patterns:
match = re.search(pattern, cleaned)
if match:
json_str = match.group(1).strip()
try:
data = json.loads(json_str)
return validate_and_create_question(data)
except json.JSONDecodeError:
continue
# Try parsing the entire response as JSON
try:
data = json.loads(cleaned)
return validate_and_create_question(data)
except json.JSONDecodeError:
pass
logger.warning(f"Failed to parse AI response: {cleaned[:200]}...")
return None
def validate_and_create_question(data: Dict[str, Any]) -> Optional[GeneratedQuestion]:
"""
Validate parsed data and create GeneratedQuestion.
Args:
data: Parsed JSON data
Returns:
GeneratedQuestion if valid, None otherwise
"""
required_fields = ["stem", "options", "correct"]
if not all(field in data for field in required_fields):
logger.warning(f"Missing required fields in AI response: {data.keys()}")
return None
# Validate options
options = data.get("options", {})
if not isinstance(options, dict):
logger.warning("Options is not a dictionary")
return None
required_options = {"A", "B", "C", "D"}
if not required_options.issubset(set(options.keys())):
logger.warning(f"Missing required options: {required_options - set(options.keys())}")
return None
# Validate correct answer
correct = str(data.get("correct", "")).upper()
if correct not in required_options:
logger.warning(f"Invalid correct answer: {correct}")
return None
return GeneratedQuestion(
stem=str(data["stem"]).strip(),
options={k: str(v).strip() for k, v in options.items()},
correct=correct,
explanation=str(data.get("explanation", "")).strip() or None,
)
async def call_openrouter_api(
prompt: str,
model: str,
max_retries: int = 3,
) -> Optional[str]:
"""
Call OpenRouter API to generate question.
Args:
prompt: The prompt to send
model: AI model to use
max_retries: Maximum retry attempts
Returns:
API response text or None if failed
"""
if not settings.OPENROUTER_API_KEY:
logger.error("OPENROUTER_API_KEY not configured")
return None
if model not in SUPPORTED_MODELS:
logger.error(f"Unsupported AI model: {model}")
return None
headers = {
"Authorization": f"Bearer {settings.OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/irt-bank-soal",
"X-Title": "IRT Bank Soal",
}
payload = {
"model": model,
"messages": [
{
"role": "user",
"content": prompt,
}
],
"max_tokens": 2000,
"temperature": 0.7,
}
timeout = httpx.Timeout(settings.OPENROUTER_TIMEOUT)
for attempt in range(max_retries):
try:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(
OPENROUTER_API_URL,
headers=headers,
json=payload,
)
if response.status_code == 200:
data = response.json()
choices = data.get("choices", [])
if choices:
message = choices[0].get("message", {})
return message.get("content")
logger.warning("No choices in OpenRouter response")
return None
elif response.status_code == 429:
# Rate limited - wait and retry
logger.warning(f"Rate limited, attempt {attempt + 1}/{max_retries}")
if attempt < max_retries - 1:
import asyncio
await asyncio.sleep(2 ** attempt)
continue
return None
else:
logger.error(
f"OpenRouter API error: {response.status_code} - {response.text}"
)
return None
except httpx.TimeoutException:
logger.warning(f"OpenRouter timeout, attempt {attempt + 1}/{max_retries}")
if attempt < max_retries - 1:
continue
return None
except Exception as e:
logger.error(f"OpenRouter API call failed: {e}")
if attempt < max_retries - 1:
continue
return None
return None
async def generate_question(
basis_item: Item,
target_level: Literal["mudah", "sulit"],
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
) -> Optional[GeneratedQuestion]:
"""
Generate a new question based on a basis item.
Args:
basis_item: The basis item (must be sedang level)
target_level: Target difficulty level
ai_model: AI model to use
Returns:
GeneratedQuestion if successful, None otherwise
"""
# Build prompt
prompt = get_prompt_template(
basis_stem=basis_item.stem,
basis_options=basis_item.options,
basis_correct=basis_item.correct_answer,
basis_explanation=basis_item.explanation,
target_level=target_level,
)
# Call OpenRouter API
response_text = await call_openrouter_api(prompt, ai_model)
if not response_text:
logger.error("No response from OpenRouter API")
return None
# Parse response
generated = parse_ai_response(response_text)
if not generated:
logger.error("Failed to parse AI response")
return None
return generated
async def check_cache_reuse(
tryout_id: str,
slot: int,
level: str,
wp_user_id: str,
website_id: int,
db: AsyncSession,
) -> Optional[Item]:
"""
Check if there's a cached item that the user hasn't answered yet.
Query DB for existing item matching (tryout_id, slot, level).
Check if user already answered this item at this difficulty level.
Args:
tryout_id: Tryout identifier
slot: Question slot
level: Difficulty level
wp_user_id: WordPress user ID
website_id: Website identifier
db: Database session
Returns:
Cached item if found and user hasn't answered, None otherwise
"""
# Find existing items at this slot/level
result = await db.execute(
select(Item).where(
and_(
Item.tryout_id == tryout_id,
Item.website_id == website_id,
Item.slot == slot,
Item.level == level,
)
)
)
existing_items = result.scalars().all()
if not existing_items:
return None
# Check each item to find one the user hasn't answered
for item in existing_items:
# Check if user has answered this item
answer_result = await db.execute(
select(UserAnswer).where(
and_(
UserAnswer.item_id == item.id,
UserAnswer.wp_user_id == wp_user_id,
)
)
)
user_answer = answer_result.scalar_one_or_none()
if user_answer is None:
# User hasn't answered this item - can reuse
logger.info(
f"Cache hit for tryout={tryout_id}, slot={slot}, level={level}, "
f"item_id={item.id}, user={wp_user_id}"
)
return item
# All items have been answered by this user
logger.info(
f"Cache miss (user answered all) for tryout={tryout_id}, slot={slot}, "
f"level={level}, user={wp_user_id}"
)
return None
async def generate_with_cache_check(
tryout_id: str,
slot: int,
level: Literal["mudah", "sulit"],
wp_user_id: str,
website_id: int,
db: AsyncSession,
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
"""
Generate question with cache checking.
First checks if AI generation is enabled for the tryout.
Then checks for cached items the user hasn't answered.
If cache miss, generates new question via AI.
Args:
tryout_id: Tryout identifier
slot: Question slot
level: Target difficulty level
wp_user_id: WordPress user ID
website_id: Website identifier
db: Database session
ai_model: AI model to use
Returns:
Tuple of (item/question or None, is_cached)
"""
# Check if AI generation is enabled for this tryout
tryout_result = await db.execute(
select(Tryout).where(
and_(
Tryout.tryout_id == tryout_id,
Tryout.website_id == website_id,
)
)
)
tryout = tryout_result.scalar_one_or_none()
if tryout and not tryout.ai_generation_enabled:
logger.info(f"AI generation disabled for tryout={tryout_id}")
# Still check cache even if AI disabled
cached_item = await check_cache_reuse(
tryout_id, slot, level, wp_user_id, website_id, db
)
if cached_item:
return cached_item, True
return None, False
# Check cache for reusable item
cached_item = await check_cache_reuse(
tryout_id, slot, level, wp_user_id, website_id, db
)
if cached_item:
return cached_item, True
# Cache miss - need to generate
# Get basis item (sedang level at same slot)
basis_result = await db.execute(
select(Item).where(
and_(
Item.tryout_id == tryout_id,
Item.website_id == website_id,
Item.slot == slot,
Item.level == "sedang",
)
).limit(1)
)
basis_item = basis_result.scalar_one_or_none()
if not basis_item:
logger.error(
f"No basis item found for tryout={tryout_id}, slot={slot}"
)
return None, False
# Generate new question
generated = await generate_question(basis_item, level, ai_model)
if not generated:
logger.error(
f"Failed to generate question for tryout={tryout_id}, slot={slot}, level={level}"
)
return None, False
return generated, False
async def save_ai_question(
generated_data: GeneratedQuestion,
tryout_id: str,
website_id: int,
basis_item_id: int,
slot: int,
level: Literal["mudah", "sedang", "sulit"],
ai_model: str,
db: AsyncSession,
) -> Optional[int]:
"""
Save AI-generated question to database.
Args:
generated_data: Generated question data
tryout_id: Tryout identifier
website_id: Website identifier
basis_item_id: Basis item ID
slot: Question slot
level: Difficulty level
ai_model: AI model used
db: Database session
Returns:
Created item ID or None if failed
"""
try:
new_item = Item(
tryout_id=tryout_id,
website_id=website_id,
slot=slot,
level=level,
stem=generated_data.stem,
options=generated_data.options,
correct_answer=generated_data.correct,
explanation=generated_data.explanation,
generated_by="ai",
ai_model=ai_model,
basis_item_id=basis_item_id,
calibrated=False,
ctt_p=None,
ctt_bobot=None,
ctt_category=None,
irt_b=None,
irt_se=None,
calibration_sample_size=0,
)
db.add(new_item)
await db.flush() # Get the ID without committing
logger.info(
f"Saved AI-generated item: id={new_item.id}, tryout={tryout_id}, "
f"slot={slot}, level={level}, model={ai_model}"
)
return new_item.id
except Exception as e:
logger.error(f"Failed to save AI-generated question: {e}")
return None
async def get_ai_stats(db: AsyncSession) -> Dict[str, Any]:
"""
Get AI generation statistics.
Args:
db: Database session
Returns:
Statistics dictionary
"""
# Total AI-generated items
total_result = await db.execute(
select(func.count(Item.id)).where(Item.generated_by == "ai")
)
total_ai_items = total_result.scalar() or 0
# Items by model
model_result = await db.execute(
select(Item.ai_model, func.count(Item.id))
.where(Item.generated_by == "ai")
.where(Item.ai_model.isnot(None))
.group_by(Item.ai_model)
)
items_by_model = {row[0]: row[1] for row in model_result.all()}
# Note: Cache hit rate would need to be tracked separately
# This is a placeholder for now
return {
"total_ai_items": total_ai_items,
"items_by_model": items_by_model,
"cache_hit_rate": 0.0,
"total_cache_hits": 0,
"total_requests": 0,
}
def validate_ai_model(model: str) -> bool:
"""
Validate that the AI model is supported.
Args:
model: AI model identifier
Returns:
True if model is supported
"""
return model in SUPPORTED_MODELS

View File

@@ -0,0 +1,702 @@
"""
CAT (Computerized Adaptive Testing) Selection Service.
Implements adaptive item selection algorithms for IRT-based testing.
Supports three modes: CTT (fixed), IRT (adaptive), and hybrid.
"""
import math
from dataclasses import dataclass
from datetime import datetime
from typing import Literal, Optional
from sqlalchemy import and_, not_, or_, select, func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.models import Item, Session, Tryout, UserAnswer
from app.services.irt_calibration import (
calculate_item_information,
estimate_b_from_ctt_p,
estimate_theta_mle,
update_theta_after_response,
)
class CATSelectionError(Exception):
"""Exception raised for CAT selection errors."""
pass
@dataclass
class NextItemResult:
"""Result of next item selection."""
item: Optional[Item]
selection_method: str # 'fixed', 'adaptive', 'hybrid'
slot: Optional[int]
level: Optional[str]
reason: str # Why this item was selected
@dataclass
class TerminationCheck:
"""Result of termination condition check."""
should_terminate: bool
reason: str
items_answered: int
current_se: Optional[float]
max_items: Optional[int]
se_threshold_met: bool
# Default SE threshold for termination
DEFAULT_SE_THRESHOLD = 0.5
# Default max items if not configured
DEFAULT_MAX_ITEMS = 50
async def get_next_item_fixed(
db: AsyncSession,
session_id: str,
tryout_id: str,
website_id: int,
level_filter: Optional[str] = None
) -> NextItemResult:
"""
Get next item in fixed order (CTT mode).
Returns items in slot order (1, 2, 3, ...).
Filters by level if specified.
Checks if student already answered this item.
Args:
db: Database session
session_id: Session identifier
tryout_id: Tryout identifier
website_id: Website identifier
level_filter: Optional difficulty level filter ('mudah', 'sedang', 'sulit')
Returns:
NextItemResult with selected item or None if no more items
"""
# Get session to find current position and answered items
session_query = select(Session).where(Session.session_id == session_id)
session_result = await db.execute(session_query)
session = session_result.scalar_one_or_none()
if not session:
raise CATSelectionError(f"Session {session_id} not found")
# Get all item IDs already answered by this user in this session
answered_query = select(UserAnswer.item_id).where(
UserAnswer.session_id == session_id
)
answered_result = await db.execute(answered_query)
answered_item_ids = [row[0] for row in answered_result.all()]
# Build query for available items
query = (
select(Item)
.where(
Item.tryout_id == tryout_id,
Item.website_id == website_id
)
.order_by(Item.slot, Item.level)
)
# Apply level filter if specified
if level_filter:
query = query.where(Item.level == level_filter)
# Exclude already answered items
if answered_item_ids:
query = query.where(not_(Item.id.in_(answered_item_ids)))
result = await db.execute(query)
items = result.scalars().all()
if not items:
return NextItemResult(
item=None,
selection_method="fixed",
slot=None,
level=None,
reason="No more items available"
)
# Return first available item (lowest slot)
next_item = items[0]
return NextItemResult(
item=next_item,
selection_method="fixed",
slot=next_item.slot,
level=next_item.level,
reason=f"Fixed order selection - slot {next_item.slot}"
)
async def get_next_item_adaptive(
db: AsyncSession,
session_id: str,
tryout_id: str,
website_id: int,
ai_generation_enabled: bool = False,
level_filter: Optional[str] = None
) -> NextItemResult:
"""
Get next item using adaptive selection (IRT mode).
Finds item where b ≈ current theta.
Only uses calibrated items (calibrated=True).
Filters: student hasn't answered this item.
Filters: AI-generated items only if AI generation is enabled.
Args:
db: Database session
session_id: Session identifier
tryout_id: Tryout identifier
website_id: Website identifier
ai_generation_enabled: Whether to include AI-generated items
level_filter: Optional difficulty level filter
Returns:
NextItemResult with selected item or None if no suitable items
"""
# Get session for current theta
session_query = select(Session).where(Session.session_id == session_id)
session_result = await db.execute(session_query)
session = session_result.scalar_one_or_none()
if not session:
raise CATSelectionError(f"Session {session_id} not found")
# Get current theta (default to 0.0 for first item)
current_theta = session.theta if session.theta is not None else 0.0
# Get all item IDs already answered by this user in this session
answered_query = select(UserAnswer.item_id).where(
UserAnswer.session_id == session_id
)
answered_result = await db.execute(answered_query)
answered_item_ids = [row[0] for row in answered_result.all()]
# Build query for available calibrated items
query = (
select(Item)
.where(
Item.tryout_id == tryout_id,
Item.website_id == website_id,
Item.calibrated == True # Only calibrated items for IRT
)
)
# Apply level filter if specified
if level_filter:
query = query.where(Item.level == level_filter)
# Exclude already answered items
if answered_item_ids:
query = query.where(not_(Item.id.in_(answered_item_ids)))
# Filter AI-generated items if AI generation is disabled
if not ai_generation_enabled:
query = query.where(Item.generated_by == 'manual')
result = await db.execute(query)
items = result.scalars().all()
if not items:
return NextItemResult(
item=None,
selection_method="adaptive",
slot=None,
level=None,
reason="No calibrated items available"
)
# Find item with b closest to current theta
# Also consider item information (prefer items with higher information at current theta)
best_item = None
best_score = float('inf')
for item in items:
if item.irt_b is None:
# Skip items without b parameter (shouldn't happen with calibrated=True)
continue
# Calculate distance from theta
b_distance = abs(item.irt_b - current_theta)
# Calculate item information at current theta
information = calculate_item_information(current_theta, item.irt_b)
# Score: minimize distance, maximize information
# Use weighted combination: lower score is better
# Add small penalty for lower information
score = b_distance - (0.1 * information)
if score < best_score:
best_score = score
best_item = item
if not best_item:
return NextItemResult(
item=None,
selection_method="adaptive",
slot=None,
level=None,
reason="No items with valid IRT parameters available"
)
return NextItemResult(
item=best_item,
selection_method="adaptive",
slot=best_item.slot,
level=best_item.level,
reason=f"Adaptive selection - b={best_item.irt_b:.3f} ≈ θ={current_theta:.3f}"
)
async def get_next_item_hybrid(
db: AsyncSession,
session_id: str,
tryout_id: str,
website_id: int,
hybrid_transition_slot: int = 10,
ai_generation_enabled: bool = False,
level_filter: Optional[str] = None
) -> NextItemResult:
"""
Get next item using hybrid selection.
Uses fixed order for first N items, then switches to adaptive.
Falls back to CTT if no calibrated items available.
Args:
db: Database session
session_id: Session identifier
tryout_id: Tryout identifier
website_id: Website identifier
hybrid_transition_slot: Slot number to transition from fixed to adaptive
ai_generation_enabled: Whether to include AI-generated items
level_filter: Optional difficulty level filter
Returns:
NextItemResult with selected item or None if no items available
"""
# Get session to check current position
session_query = select(Session).where(Session.session_id == session_id)
session_result = await db.execute(session_query)
session = session_result.scalar_one_or_none()
if not session:
raise CATSelectionError(f"Session {session_id} not found")
# Count answered items to determine current position
count_query = select(func.count(UserAnswer.id)).where(
UserAnswer.session_id == session_id
)
count_result = await db.execute(count_query)
items_answered = count_result.scalar() or 0
# Determine current slot (next slot to fill)
current_slot = items_answered + 1
# Check if we're still in fixed phase
if current_slot <= hybrid_transition_slot:
# Use fixed selection for initial items
result = await get_next_item_fixed(
db, session_id, tryout_id, website_id, level_filter
)
result.selection_method = "hybrid_fixed"
result.reason = f"Hybrid mode (fixed phase) - slot {current_slot}"
return result
# Try adaptive selection
adaptive_result = await get_next_item_adaptive(
db, session_id, tryout_id, website_id, ai_generation_enabled, level_filter
)
if adaptive_result.item is not None:
adaptive_result.selection_method = "hybrid_adaptive"
adaptive_result.reason = f"Hybrid mode (adaptive phase) - {adaptive_result.reason}"
return adaptive_result
# Fallback to fixed selection if no calibrated items available
fixed_result = await get_next_item_fixed(
db, session_id, tryout_id, website_id, level_filter
)
fixed_result.selection_method = "hybrid_fallback"
fixed_result.reason = f"Hybrid mode (CTT fallback) - {fixed_result.reason}"
return fixed_result
async def update_theta(
db: AsyncSession,
session_id: str,
item_id: int,
is_correct: bool
) -> tuple[float, float]:
"""
Update session theta estimate based on response.
Calls estimate_theta from irt_calibration.py.
Updates session.theta and session.theta_se.
Handles initial theta (uses 0.0 for first item).
Clamps theta to [-3, +3].
Args:
db: Database session
session_id: Session identifier
item_id: Item that was answered
is_correct: Whether the answer was correct
Returns:
Tuple of (theta, theta_se)
"""
return await update_theta_after_response(db, session_id, item_id, is_correct)
async def should_terminate(
db: AsyncSession,
session_id: str,
max_items: Optional[int] = None,
se_threshold: float = DEFAULT_SE_THRESHOLD
) -> TerminationCheck:
"""
Check if session should terminate.
Termination conditions:
- Reached max_items
- Reached SE threshold (theta_se < se_threshold)
- No more items available
Args:
db: Database session
session_id: Session identifier
max_items: Maximum items allowed (None = no limit)
se_threshold: SE threshold for termination
Returns:
TerminationCheck with termination status and reason
"""
# Get session
session_query = select(Session).where(Session.session_id == session_id)
session_result = await db.execute(session_query)
session = session_result.scalar_one_or_none()
if not session:
raise CATSelectionError(f"Session {session_id} not found")
# Count answered items
count_query = select(func.count(UserAnswer.id)).where(
UserAnswer.session_id == session_id
)
count_result = await db.execute(count_query)
items_answered = count_result.scalar() or 0
# Check max items
max_items_reached = False
if max_items is not None and items_answered >= max_items:
max_items_reached = True
# Check SE threshold
current_se = session.theta_se
se_threshold_met = False
if current_se is not None and current_se < se_threshold:
se_threshold_met = True
# Check if we have enough items for SE threshold (at least 15 items per PRD)
min_items_for_se = 15
se_threshold_met = se_threshold_met and items_answered >= min_items_for_se
# Determine termination
should_term = max_items_reached or se_threshold_met
# Build reason
reasons = []
if max_items_reached:
reasons.append(f"max items reached ({items_answered}/{max_items})")
if se_threshold_met:
reasons.append(f"SE threshold met ({current_se:.3f} < {se_threshold})")
if not reasons:
reasons.append("continuing")
return TerminationCheck(
should_terminate=should_term,
reason="; ".join(reasons),
items_answered=items_answered,
current_se=current_se,
max_items=max_items,
se_threshold_met=se_threshold_met
)
async def get_next_item(
db: AsyncSession,
session_id: str,
selection_mode: Literal["fixed", "adaptive", "hybrid"] = "fixed",
hybrid_transition_slot: int = 10,
ai_generation_enabled: bool = False,
level_filter: Optional[str] = None
) -> NextItemResult:
"""
Get next item based on selection mode.
Main entry point for item selection.
Args:
db: Database session
session_id: Session identifier
selection_mode: Selection mode ('fixed', 'adaptive', 'hybrid')
hybrid_transition_slot: Slot to transition in hybrid mode
ai_generation_enabled: Whether AI generation is enabled
level_filter: Optional difficulty level filter
Returns:
NextItemResult with selected item
"""
# Get session for tryout info
session_query = select(Session).where(Session.session_id == session_id)
session_result = await db.execute(session_query)
session = session_result.scalar_one_or_none()
if not session:
raise CATSelectionError(f"Session {session_id} not found")
tryout_id = session.tryout_id
website_id = session.website_id
if selection_mode == "fixed":
return await get_next_item_fixed(
db, session_id, tryout_id, website_id, level_filter
)
elif selection_mode == "adaptive":
return await get_next_item_adaptive(
db, session_id, tryout_id, website_id, ai_generation_enabled, level_filter
)
elif selection_mode == "hybrid":
return await get_next_item_hybrid(
db, session_id, tryout_id, website_id,
hybrid_transition_slot, ai_generation_enabled, level_filter
)
else:
raise CATSelectionError(f"Unknown selection mode: {selection_mode}")
async def check_user_level_reuse(
db: AsyncSession,
wp_user_id: str,
website_id: int,
tryout_id: str,
slot: int,
level: str
) -> bool:
"""
Check if user has already answered a question at this difficulty level.
Per PRD FR-5.3: Check if student user_id already answered question
at specific difficulty level.
Args:
db: Database session
wp_user_id: WordPress user ID
website_id: Website identifier
tryout_id: Tryout identifier
slot: Question slot
level: Difficulty level
Returns:
True if user has answered at this level, False otherwise
"""
# Check if user has answered any item at this slot/level combination
query = (
select(func.count(UserAnswer.id))
.join(Item, UserAnswer.item_id == Item.id)
.where(
UserAnswer.wp_user_id == wp_user_id,
UserAnswer.website_id == website_id,
UserAnswer.tryout_id == tryout_id,
Item.slot == slot,
Item.level == level
)
)
result = await db.execute(query)
count = result.scalar() or 0
return count > 0
async def get_available_levels_for_slot(
db: AsyncSession,
tryout_id: str,
website_id: int,
slot: int
) -> list[str]:
"""
Get available difficulty levels for a specific slot.
Args:
db: Database session
tryout_id: Tryout identifier
website_id: Website identifier
slot: Question slot
Returns:
List of available levels
"""
query = (
select(Item.level)
.where(
Item.tryout_id == tryout_id,
Item.website_id == website_id,
Item.slot == slot
)
.distinct()
)
result = await db.execute(query)
levels = [row[0] for row in result.all()]
return levels
# Admin playground functions for testing CAT behavior
async def simulate_cat_selection(
db: AsyncSession,
tryout_id: str,
website_id: int,
initial_theta: float = 0.0,
selection_mode: Literal["fixed", "adaptive", "hybrid"] = "adaptive",
max_items: int = 15,
se_threshold: float = DEFAULT_SE_THRESHOLD,
hybrid_transition_slot: int = 10
) -> dict:
"""
Simulate CAT selection for admin testing.
Returns sequence of selected items with b values and theta progression.
Args:
db: Database session
tryout_id: Tryout identifier
website_id: Website identifier
initial_theta: Starting theta value
selection_mode: Selection mode to use
max_items: Maximum items to simulate
se_threshold: SE threshold for termination
hybrid_transition_slot: Slot to transition in hybrid mode
Returns:
Dict with simulation results
"""
# Get all items for this tryout
items_query = (
select(Item)
.where(
Item.tryout_id == tryout_id,
Item.website_id == website_id
)
.order_by(Item.slot)
)
items_result = await db.execute(items_query)
all_items = list(items_result.scalars().all())
if not all_items:
return {
"error": "No items found for this tryout",
"tryout_id": tryout_id,
"website_id": website_id
}
# Simulate selection
selected_items = []
current_theta = initial_theta
current_se = 3.0 # Start with high uncertainty
used_item_ids = set()
for i in range(max_items):
# Get available items
available_items = [item for item in all_items if item.id not in used_item_ids]
if not available_items:
break
# Select based on mode
if selection_mode == "adaptive":
# Filter to calibrated items only
calibrated_items = [item for item in available_items if item.calibrated and item.irt_b is not None]
if not calibrated_items:
# Fallback to any available item
calibrated_items = available_items
# Find item closest to current theta
best_item = min(
calibrated_items,
key=lambda item: abs((item.irt_b or 0) - current_theta)
)
elif selection_mode == "fixed":
# Select in slot order
best_item = min(available_items, key=lambda item: item.slot)
else: # hybrid
if i < hybrid_transition_slot:
best_item = min(available_items, key=lambda item: item.slot)
else:
calibrated_items = [item for item in available_items if item.calibrated and item.irt_b is not None]
if calibrated_items:
best_item = min(
calibrated_items,
key=lambda item: abs((item.irt_b or 0) - current_theta)
)
else:
best_item = min(available_items, key=lambda item: item.slot)
used_item_ids.add(best_item.id)
# Simulate response (random based on probability)
import random
b = best_item.irt_b or estimate_b_from_ctt_p(best_item.ctt_p) if best_item.ctt_p else 0.0
p_correct = 1.0 / (1.0 + math.exp(-(current_theta - b)))
is_correct = random.random() < p_correct
# Update theta (simplified)
responses = [1 if item.get('is_correct', True) else 0 for item in selected_items]
responses.append(1 if is_correct else 0)
b_params = [item['b'] for item in selected_items]
b_params.append(b)
new_theta, new_se = estimate_theta_mle(responses, b_params, current_theta)
current_theta = new_theta
current_se = new_se
selected_items.append({
"slot": best_item.slot,
"level": best_item.level,
"b": b,
"is_correct": is_correct,
"theta_after": current_theta,
"se_after": current_se,
"calibrated": best_item.calibrated
})
# Check SE threshold
if current_se < se_threshold and i >= 14: # At least 15 items
break
return {
"tryout_id": tryout_id,
"website_id": website_id,
"initial_theta": initial_theta,
"selection_mode": selection_mode,
"total_items": len(selected_items),
"final_theta": current_theta,
"final_se": current_se,
"se_threshold_met": current_se < se_threshold,
"items": selected_items
}

View File

@@ -0,0 +1,431 @@
"""
Configuration Management Service.
Provides functions to retrieve and update tryout configurations.
Handles configuration changes for scoring, selection, and normalization modes.
"""
import logging
from typing import Any, Dict, Literal, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.tryout import Tryout
from app.models.tryout_stats import TryoutStats
logger = logging.getLogger(__name__)
async def get_config(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> Tryout:
"""
Fetch tryout configuration for a specific tryout.
Returns all configuration fields including scoring_mode, selection_mode,
normalization_mode, and other settings.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Tryout model with all configuration fields
Raises:
ValueError: If tryout not found
"""
result = await db.execute(
select(Tryout).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
tryout = result.scalar_one_or_none()
if tryout is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
return tryout
async def update_config(
db: AsyncSession,
website_id: int,
tryout_id: str,
config_updates: Dict[str, Any],
) -> Tryout:
"""
Update tryout configuration with provided fields.
Accepts a dictionary of configuration updates and applies them to the
tryout configuration. Only provided fields are updated.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
config_updates: Dictionary of configuration fields to update
Returns:
Updated Tryout model
Raises:
ValueError: If tryout not found or invalid field provided
"""
# Fetch tryout
result = await db.execute(
select(Tryout).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
tryout = result.scalar_one_or_none()
if tryout is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
# Valid configuration fields
valid_fields = {
"name", "description",
"scoring_mode", "selection_mode", "normalization_mode",
"min_sample_for_dynamic", "static_rataan", "static_sb",
"ai_generation_enabled",
"hybrid_transition_slot",
"min_calibration_sample", "theta_estimation_method", "fallback_to_ctt_on_error",
}
# Update only valid fields
updated_fields = []
for field, value in config_updates.items():
if field not in valid_fields:
logger.warning(f"Skipping invalid config field: {field}")
continue
setattr(tryout, field, value)
updated_fields.append(field)
if not updated_fields:
logger.warning(f"No valid config fields to update for tryout {tryout_id}")
await db.flush()
logger.info(
f"Updated config for tryout {tryout_id}, website {website_id}: "
f"{', '.join(updated_fields)}"
)
return tryout
async def toggle_normalization_mode(
db: AsyncSession,
website_id: int,
tryout_id: str,
new_mode: Literal["static", "dynamic", "hybrid"],
) -> Tryout:
"""
Toggle normalization mode for a tryout.
Updates the normalization_mode field. If switching to "auto" (dynamic mode),
checks if threshold is met and logs appropriate warnings.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
new_mode: New normalization mode ("static", "dynamic", "hybrid")
Returns:
Updated Tryout model
Raises:
ValueError: If tryout not found or invalid mode provided
"""
if new_mode not in ["static", "dynamic", "hybrid"]:
raise ValueError(
f"Invalid normalization_mode: {new_mode}. "
"Must be 'static', 'dynamic', or 'hybrid'"
)
# Fetch tryout with stats
result = await db.execute(
select(Tryout).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
tryout = result.scalar_one_or_none()
if tryout is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
old_mode = tryout.normalization_mode
tryout.normalization_mode = new_mode
# Fetch stats for participant count
stats_result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = stats_result.scalar_one_or_none()
participant_count = stats.participant_count if stats else 0
min_sample = tryout.min_sample_for_dynamic
# Log warnings and suggestions based on mode change
if new_mode == "dynamic":
if participant_count < min_sample:
logger.warning(
f"Switching to dynamic normalization with only {participant_count} "
f"participants (threshold: {min_sample}). "
"Dynamic normalization may produce unreliable results."
)
else:
logger.info(
f"Switching to dynamic normalization with {participant_count} "
f"participants (threshold: {min_sample}). "
"Ready for dynamic normalization."
)
elif new_mode == "hybrid":
if participant_count >= min_sample:
logger.info(
f"Switching to hybrid normalization with {participant_count} "
f"participants (threshold: {min_sample}). "
"Will use dynamic normalization immediately."
)
else:
logger.info(
f"Switching to hybrid normalization with {participant_count} "
f"participants (threshold: {min_sample}). "
f"Will use static normalization until {min_sample} participants reached."
)
await db.flush()
logger.info(
f"Toggled normalization mode for tryout {tryout_id}, "
f"website {website_id}: {old_mode} -> {new_mode}"
)
return tryout
async def get_normalization_config(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> Dict[str, Any]:
"""
Get normalization configuration summary.
Returns current normalization mode, static values, dynamic values,
participant count, and threshold status.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Dictionary with normalization configuration summary
Raises:
ValueError: If tryout not found
"""
# Fetch tryout config
tryout = await get_config(db, website_id, tryout_id)
# Fetch stats
stats_result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = stats_result.scalar_one_or_none()
# Determine threshold status
participant_count = stats.participant_count if stats else 0
min_sample = tryout.min_sample_for_dynamic
threshold_ready = participant_count >= min_sample
participants_needed = max(0, min_sample - participant_count)
# Determine current effective mode
current_mode = tryout.normalization_mode
if current_mode == "hybrid":
effective_mode = "dynamic" if threshold_ready else "static"
else:
effective_mode = current_mode
return {
"tryout_id": tryout_id,
"normalization_mode": current_mode,
"effective_mode": effective_mode,
"static_rataan": tryout.static_rataan,
"static_sb": tryout.static_sb,
"dynamic_rataan": stats.rataan if stats else None,
"dynamic_sb": stats.sb if stats else None,
"participant_count": participant_count,
"min_sample_for_dynamic": min_sample,
"threshold_ready": threshold_ready,
"participants_needed": participants_needed,
}
async def reset_normalization_stats(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> TryoutStats:
"""
Reset TryoutStats to initial values.
Resets participant_count to 0 and clears running sums.
Switches normalization_mode to "static" temporarily.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Reset TryoutStats record
Raises:
ValueError: If tryout not found
"""
# Fetch tryout
tryout_result = await db.execute(
select(Tryout).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
tryout = tryout_result.scalar_one_or_none()
if tryout is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
# Switch to static mode temporarily
tryout.normalization_mode = "static"
# Fetch or create stats
stats_result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = stats_result.scalar_one_or_none()
if stats is None:
# Create new empty stats record
stats = TryoutStats(
website_id=website_id,
tryout_id=tryout_id,
participant_count=0,
total_nm_sum=0.0,
total_nm_sq_sum=0.0,
rataan=None,
sb=None,
min_nm=None,
max_nm=None,
)
db.add(stats)
else:
# Reset existing stats
stats.participant_count = 0
stats.total_nm_sum = 0.0
stats.total_nm_sq_sum = 0.0
stats.rataan = None
stats.sb = None
stats.min_nm = None
stats.max_nm = None
await db.flush()
logger.info(
f"Reset normalization stats for tryout {tryout_id}, "
f"website {website_id}. Normalization mode switched to static."
)
return stats
async def get_full_config(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> Dict[str, Any]:
"""
Get full tryout configuration including stats.
Returns all configuration fields plus current statistics.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Dictionary with full configuration and stats
Raises:
ValueError: If tryout not found
"""
# Fetch tryout config
tryout = await get_config(db, website_id, tryout_id)
# Fetch stats
stats_result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = stats_result.scalar_one_or_none()
# Build config dictionary
config = {
"tryout_id": tryout.tryout_id,
"name": tryout.name,
"description": tryout.description,
"scoring_mode": tryout.scoring_mode,
"selection_mode": tryout.selection_mode,
"normalization_mode": tryout.normalization_mode,
"min_sample_for_dynamic": tryout.min_sample_for_dynamic,
"static_rataan": tryout.static_rataan,
"static_sb": tryout.static_sb,
"ai_generation_enabled": tryout.ai_generation_enabled,
"hybrid_transition_slot": tryout.hybrid_transition_slot,
"min_calibration_sample": tryout.min_calibration_sample,
"theta_estimation_method": tryout.theta_estimation_method,
"fallback_to_ctt_on_error": tryout.fallback_to_ctt_on_error,
"stats": {
"participant_count": stats.participant_count if stats else 0,
"rataan": stats.rataan if stats else None,
"sb": stats.sb if stats else None,
"min_nm": stats.min_nm if stats else None,
"max_nm": stats.max_nm if stats else None,
"last_calculated": stats.last_calculated if stats else None,
},
"created_at": tryout.created_at,
"updated_at": tryout.updated_at,
}
return config

385
app/services/ctt_scoring.py Normal file
View File

@@ -0,0 +1,385 @@
"""
CTT (Classical Test Theory) Scoring Engine.
Implements exact Excel formulas for:
- p-value (Tingkat Kesukaran): p = Σ Benar / Total Peserta
- Bobot (Weight): Bobot = 1 - p
- NM (Nilai Mentah): NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000
- NN (Nilai Nasional): NN = 500 + 100 × ((NM - Rataan) / SB)
All formulas match PRD Section 13.1 exactly.
"""
import math
from datetime import datetime, timezone
from typing import Optional
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.item import Item
from app.models.tryout_stats import TryoutStats
from app.models.user_answer import UserAnswer
def calculate_ctt_p(total_correct: int, total_participants: int) -> float:
"""
Calculate CTT p-value (Tingkat Kesukaran / Difficulty).
Formula: p = Σ Benar / Total Peserta
Args:
total_correct: Number of correct answers (Σ Benar)
total_participants: Total number of participants (Total Peserta)
Returns:
p-value in range [0.0, 1.0]
Raises:
ValueError: If total_participants is 0 or values are invalid
"""
if total_participants <= 0:
raise ValueError("total_participants must be greater than 0")
if total_correct < 0:
raise ValueError("total_correct cannot be negative")
if total_correct > total_participants:
raise ValueError("total_correct cannot exceed total_participants")
p = total_correct / total_participants
# Clamp to valid range [0, 1]
return max(0.0, min(1.0, p))
def calculate_ctt_bobot(p_value: float) -> float:
"""
Calculate CTT bobot (weight) from p-value.
Formula: Bobot = 1 - p
Interpretation:
- Easy questions (p > 0.70) have low bobot (< 0.30)
- Difficult questions (p < 0.30) have high bobot (> 0.70)
- Medium questions (0.30 ≤ p ≤ 0.70) have moderate bobot
Args:
p_value: CTT p-value in range [0.0, 1.0]
Returns:
bobot (weight) in range [0.0, 1.0]
Raises:
ValueError: If p_value is outside [0, 1] range
"""
if not 0.0 <= p_value <= 1.0:
raise ValueError(f"p_value must be in range [0, 1], got {p_value}")
bobot = 1.0 - p_value
# Clamp to valid range [0, 1]
return max(0.0, min(1.0, bobot))
def calculate_ctt_nm(total_bobot_siswa: float, total_bobot_max: float) -> int:
"""
Calculate CTT NM (Nilai Mentah / Raw Score).
Formula: NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000
This is equivalent to Excel's SUMPRODUCT calculation where:
- Total_Bobot_Siswa = Σ(bobot_earned for each correct answer)
- Total_Bobot_Max = Σ(bobot for all questions)
Args:
total_bobot_siswa: Total weight earned by student
total_bobot_max: Maximum possible weight (sum of all item bobots)
Returns:
NM (raw score) in range [0, 1000]
Raises:
ValueError: If total_bobot_max is 0 or values are invalid
"""
if total_bobot_max <= 0:
raise ValueError("total_bobot_max must be greater than 0")
if total_bobot_siswa < 0:
raise ValueError("total_bobot_siswa cannot be negative")
nm = (total_bobot_siswa / total_bobot_max) * 1000
# Round to integer and clamp to valid range [0, 1000]
nm_int = round(nm)
return max(0, min(1000, nm_int))
def calculate_ctt_nn(nm: int, rataan: float, sb: float) -> int:
"""
Calculate CTT NN (Nilai Nasional / Normalized Score).
Formula: NN = 500 + 100 × ((NM - Rataan) / SB)
Normalizes scores to mean=500, SD=100 distribution.
Args:
nm: Nilai Mentah (raw score) in range [0, 1000]
rataan: Mean of NM scores
sb: Standard deviation of NM scores (Simpangan Baku)
Returns:
NN (normalized score) in range [0, 1000]
Raises:
ValueError: If nm is out of range or sb is invalid
"""
if not 0 <= nm <= 1000:
raise ValueError(f"nm must be in range [0, 1000], got {nm}")
if sb <= 0:
# If SD is 0 or negative, return default normalized score
# This handles edge case where all scores are identical
return 500
# Calculate normalized score
z_score = (nm - rataan) / sb
nn = 500 + 100 * z_score
# Round to integer and clamp to valid range [0, 1000]
nn_int = round(nn)
return max(0, min(1000, nn_int))
def categorize_difficulty(p_value: float) -> str:
"""
Categorize question difficulty based on CTT p-value.
Categories per CTT standards (PRD Section 13.2):
- p < 0.30 → Sukar (Sulit)
- 0.30 ≤ p ≤ 0.70 → Sedang
- p > 0.70 → Mudah
Args:
p_value: CTT p-value in range [0.0, 1.0]
Returns:
Difficulty category: "mudah", "sedang", or "sulit"
"""
if p_value > 0.70:
return "mudah"
elif p_value >= 0.30:
return "sedang"
else:
return "sulit"
async def calculate_ctt_p_for_item(
db: AsyncSession, item_id: int
) -> Optional[float]:
"""
Calculate CTT p-value for a specific item from existing responses.
Queries all UserAnswer records for the item to calculate:
p = Σ Benar / Total Peserta
Args:
db: Async database session
item_id: Item ID to calculate p-value for
Returns:
p-value in range [0.0, 1.0], or None if no responses exist
"""
# Count total responses and correct responses
result = await db.execute(
select(
func.count().label("total"),
func.sum(func.cast(UserAnswer.is_correct, type_=func.INTEGER)).label("correct"),
).where(UserAnswer.item_id == item_id)
)
row = result.first()
if row is None or row.total == 0:
return None
return calculate_ctt_p(row.correct or 0, row.total)
async def update_tryout_stats(
db: AsyncSession,
website_id: int,
tryout_id: str,
nm: int,
) -> TryoutStats:
"""
Incrementally update TryoutStats with new NM score.
Updates:
- participant_count += 1
- total_nm_sum += nm
- total_nm_sq_sum += nm²
- Recalculates rataan (mean) and sb (standard deviation)
- Updates min_nm and max_nm if applicable
Uses Welford's online algorithm for numerically stable variance calculation.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
nm: New NM score to add
Returns:
Updated TryoutStats record
"""
# Get or create TryoutStats
result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = result.scalar_one_or_none()
if stats is None:
# Create new stats record
stats = TryoutStats(
website_id=website_id,
tryout_id=tryout_id,
participant_count=1,
total_nm_sum=float(nm),
total_nm_sq_sum=float(nm * nm),
rataan=float(nm),
sb=0.0, # SD is 0 for single data point
min_nm=nm,
max_nm=nm,
last_calculated=datetime.now(timezone.utc),
)
db.add(stats)
else:
# Incrementally update existing stats
stats.participant_count += 1
stats.total_nm_sum += nm
stats.total_nm_sq_sum += nm * nm
# Update min/max
if stats.min_nm is None or nm < stats.min_nm:
stats.min_nm = nm
if stats.max_nm is None or nm > stats.max_nm:
stats.max_nm = nm
# Recalculate mean and SD
n = stats.participant_count
sum_nm = stats.total_nm_sum
sum_nm_sq = stats.total_nm_sq_sum
# Mean = Σ NM / n
stats.rataan = sum_nm / n
# Variance = (Σ NM² / n) - (mean)²
# Using population standard deviation
if n > 1:
variance = (sum_nm_sq / n) - (stats.rataan ** 2)
# Clamp variance to non-negative (handles floating point errors)
variance = max(0.0, variance)
stats.sb = math.sqrt(variance)
else:
stats.sb = 0.0
stats.last_calculated = datetime.now(timezone.utc)
await db.flush()
return stats
async def get_total_bobot_max(
db: AsyncSession,
website_id: int,
tryout_id: str,
level: str = "sedang",
) -> float:
"""
Calculate total maximum bobot for a tryout.
Total_Bobot_Max = Σ bobot for all questions in the tryout
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
level: Difficulty level to filter by (default: "sedang")
Returns:
Sum of all item bobots
Raises:
ValueError: If no items found or items have no bobot values
"""
result = await db.execute(
select(func.sum(Item.ctt_bobot)).where(
Item.website_id == website_id,
Item.tryout_id == tryout_id,
Item.level == level,
)
)
total_bobot = result.scalar()
if total_bobot is None or total_bobot == 0:
raise ValueError(
f"No items with bobot found for tryout {tryout_id}, level {level}"
)
return float(total_bobot)
def convert_ctt_p_to_irt_b(p_value: float) -> float:
"""
Convert CTT p-value to IRT difficulty parameter (b).
Formula: b ≈ -ln((1-p)/p)
This provides an initial estimate for IRT calibration.
Maps p ∈ (0, 1) to b ∈ (-∞, +∞), typically [-3, +3].
Args:
p_value: CTT p-value in range (0.0, 1.0)
Returns:
IRT b-parameter estimate
Raises:
ValueError: If p_value is at boundaries (0 or 1)
"""
if p_value <= 0.0 or p_value >= 1.0:
# Handle edge cases by clamping
if p_value <= 0.0:
return 3.0 # Very difficult
else:
return -3.0 # Very easy
# b ≈ -ln((1-p)/p)
odds_ratio = (1 - p_value) / p_value
b = -math.log(odds_ratio)
# Clamp to valid IRT range [-3, +3]
return max(-3.0, min(3.0, b))
def map_theta_to_nn(theta: float) -> int:
"""
Map IRT theta (ability) to NN score for comparison.
Formula: NN = 500 + (θ / 3) × 500
Maps θ ∈ [-3, +3] to NN ∈ [0, 1000].
Args:
theta: IRT ability estimate in range [-3.0, +3.0]
Returns:
NN score in range [0, 1000]
"""
# Clamp theta to valid range
theta_clamped = max(-3.0, min(3.0, theta))
# Map to NN
nn = 500 + (theta_clamped / 3) * 500
# Round and clamp to valid range
return max(0, min(1000, round(nn)))

View File

@@ -0,0 +1,521 @@
"""
Excel Import/Export Service for Question Migration.
Handles import from standardized Excel format with:
- Row 2: KUNCI (answer key)
- Row 4: TK (tingkat kesukaran p-value)
- Row 5: BOBOT (weight 1-p)
- Rows 6+: Individual question data
Ensures 100% data integrity with comprehensive validation.
"""
import os
from datetime import datetime
from typing import Any, Dict, List, Optional
import openpyxl
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.item import Item
from app.services.ctt_scoring import (
convert_ctt_p_to_irt_b,
categorize_difficulty,
)
def validate_excel_structure(file_path: str) -> Dict[str, Any]:
"""
Validate Excel file structure against required format.
Checks:
- File exists and is valid Excel (.xlsx)
- Sheet "CONTOH" exists
- Required rows exist (Row 2 KUNCI, Row 4 TK, Row 5 BOBOT)
- Question data rows have required columns
Args:
file_path: Path to Excel file
Returns:
Dict with:
- valid: bool - Whether structure is valid
- errors: List[str] - Validation errors if any
"""
errors: List[str] = []
# Check file exists
if not os.path.exists(file_path):
return {"valid": False, "errors": [f"File not found: {file_path}"]}
# Check file extension
if not file_path.lower().endswith('.xlsx'):
return {"valid": False, "errors": ["File must be .xlsx format"]}
try:
wb = openpyxl.load_workbook(file_path, data_only=False)
except Exception as e:
return {"valid": False, "errors": [f"Failed to load Excel file: {str(e)}"]}
# Check sheet "CONTOH" exists
if "CONTOH" not in wb.sheetnames:
return {
"valid": False,
"errors": ['Sheet "CONTOH" not found. Available sheets: ' + ", ".join(wb.sheetnames)]
}
ws = wb["CONTOH"]
# Check minimum rows exist
if ws.max_row < 6:
errors.append(f"Excel file must have at least 6 rows (found {ws.max_row})")
# Check Row 2 exists (KUNCI)
if ws.max_row < 2:
errors.append("Row 2 (KUNCI - answer key) is required")
# Check Row 4 exists (TK - p-values)
if ws.max_row < 4:
errors.append("Row 4 (TK - p-values) is required")
# Check Row 5 exists (BOBOT - weights)
if ws.max_row < 5:
errors.append("Row 5 (BOBOT - weights) is required")
# Check question data rows exist (6+)
if ws.max_row < 6:
errors.append("Question data rows (6+) are required")
# Check minimum columns (at least slot, level, soal_text, options, correct_answer)
if ws.max_column < 8:
errors.append(
f"Excel file must have at least 8 columns (found {ws.max_column}). "
"Expected: slot, level, soal_text, options_A, options_B, options_C, options_D, correct_answer"
)
# Check KUNCI row has values
if ws.max_row >= 2:
kunce_row_values = [ws.cell(2, col).value for col in range(4, ws.max_column + 1)]
if not any(v for v in kunce_row_values if v and v != "KUNCI"):
errors.append("Row 2 (KUNCI) must contain answer key values")
# Check TK row has numeric values
if ws.max_row >= 4:
wb_data = openpyxl.load_workbook(file_path, data_only=True)
ws_data = wb_data["CONTOH"]
tk_row_values = [ws_data.cell(4, col).value for col in range(4, ws.max_column + 1)]
if not any(v for v in tk_row_values if isinstance(v, (int, float))):
errors.append("Row 4 (TK) must contain numeric p-values")
# Check BOBOT row has numeric values
if ws.max_row >= 5:
wb_data = openpyxl.load_workbook(file_path, data_only=True)
ws_data = wb_data["CONTOH"]
bobot_row_values = [ws_data.cell(5, col).value for col in range(4, ws.max_column + 1)]
if not any(v for v in bobot_row_values if isinstance(v, (int, float))):
errors.append("Row 5 (BOBOT) must contain numeric weight values")
return {"valid": len(errors) == 0, "errors": errors}
def parse_excel_import(
file_path: str,
website_id: int,
tryout_id: str
) -> Dict[str, Any]:
"""
Parse Excel file and extract items with full validation.
Excel structure:
- Sheet name: "CONTOH"
- Row 2: KUNCI (answer key) - extract correct answers per slot
- Row 4: TK (tingkat kesukaran p-value) - extract p-values per slot
- Row 5: BOBOT (weight 1-p) - extract bobot per slot
- Rows 6+: Individual question data
Args:
file_path: Path to Excel file
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Dict with:
- items: List[Dict] - Parsed items ready for database
- validation_errors: List[str] - Any validation errors
- items_count: int - Number of items parsed
"""
# First validate structure
validation = validate_excel_structure(file_path)
if not validation["valid"]:
return {
"items": [],
"validation_errors": validation["errors"],
"items_count": 0
}
items: List[Dict[str, Any]] = []
errors: List[str] = []
try:
# Load workbook twice: once with formulas, once with data_only
wb = openpyxl.load_workbook(file_path, data_only=False)
ws = wb["CONTOH"]
wb_data = openpyxl.load_workbook(file_path, data_only=True)
ws_data = wb_data["CONTOH"]
# Extract answer key from Row 2
answer_key: Dict[int, str] = {}
for col in range(4, ws.max_column + 1):
key_cell = ws.cell(2, col).value
if key_cell and key_cell != "KUNCI":
slot_num = col - 3 # Column 4 -> slot 1
answer_key[slot_num] = str(key_cell).strip().upper()
# Extract p-values from Row 4
p_values: Dict[int, float] = {}
for col in range(4, ws.max_column + 1):
slot_num = col - 3
if slot_num in answer_key:
p_cell = ws_data.cell(4, col).value
if p_cell and isinstance(p_cell, (int, float)):
p_values[slot_num] = float(p_cell)
# Extract bobot from Row 5
bobot_values: Dict[int, float] = {}
for col in range(4, ws.max_column + 1):
slot_num = col - 3
if slot_num in answer_key:
bobot_cell = ws_data.cell(5, col).value
if bobot_cell and isinstance(bobot_cell, (int, float)):
bobot_values[slot_num] = float(bobot_cell)
# Parse question data rows (6+)
for row_idx in range(6, ws.max_row + 1):
# Column mapping (based on project-brief):
# Column 1 (A): slot (question number)
# Column 2 (B): level (mudah/sedang/sulit)
# Column 3 (C): soal_text (question stem)
# Column 4 (D): options_A
# Column 5 (E): options_B
# Column 6 (F): options_C
# Column 7 (G): options_D
# Column 8 (H): correct_answer
slot_cell = ws.cell(row_idx, 1).value
level_cell = ws.cell(row_idx, 2).value
soal_text_cell = ws.cell(row_idx, 3).value
option_a = ws.cell(row_idx, 4).value
option_b = ws.cell(row_idx, 5).value
option_c = ws.cell(row_idx, 6).value
option_d = ws.cell(row_idx, 7).value
correct_cell = ws.cell(row_idx, 8).value
# Skip empty rows
if not slot_cell and not soal_text_cell:
continue
# Validate required fields
if not slot_cell:
errors.append(f"Row {row_idx}: Missing slot value")
continue
slot_num = int(slot_cell) if isinstance(slot_cell, (int, float)) else None
if slot_num is None:
try:
slot_num = int(str(slot_cell).strip())
except (ValueError, AttributeError):
errors.append(f"Row {row_idx}: Invalid slot value: {slot_cell}")
continue
# Get or infer level
if not level_cell:
# Use p-value from Row 4 to determine level
p_val = p_values.get(slot_num, 0.5)
level_val = categorize_difficulty(p_val)
else:
level_val = str(level_cell).strip().lower()
if level_val not in ["mudah", "sedang", "sulit"]:
errors.append(
f"Row {row_idx}: Invalid level '{level_cell}'. Must be 'mudah', 'sedang', or 'sulit'"
)
continue
# Validate soal_text
if not soal_text_cell:
errors.append(f"Row {row_idx} (slot {slot_num}): Missing soal_text (question stem)")
continue
# Build options JSON
options: Dict[str, str] = {}
if option_a:
options["A"] = str(option_a).strip()
if option_b:
options["B"] = str(option_b).strip()
if option_c:
options["C"] = str(option_c).strip()
if option_d:
options["D"] = str(option_d).strip()
if len(options) < 4:
errors.append(
f"Row {row_idx} (slot {slot_num}): Missing options. Expected 4 options (A, B, C, D)"
)
continue
# Get correct answer
if not correct_cell:
# Fall back to answer key from Row 2
correct_ans = answer_key.get(slot_num)
if not correct_ans:
errors.append(
f"Row {row_idx} (slot {slot_num}): Missing correct_answer and no answer key found"
)
continue
else:
correct_ans = str(correct_cell).strip().upper()
if correct_ans not in ["A", "B", "C", "D"]:
errors.append(
f"Row {row_idx} (slot {slot_num}): Invalid correct_answer '{correct_ans}'. Must be A, B, C, or D"
)
continue
# Get CTT parameters
p_val = p_values.get(slot_num, 0.5)
bobot_val = bobot_values.get(slot_num, 1.0 - p_val)
# Validate p-value range
if p_val < 0 or p_val > 1:
errors.append(
f"Slot {slot_num}: Invalid p-value {p_val}. Must be in range [0, 1]"
)
continue
# Validate bobot range
if bobot_val < 0 or bobot_val > 1:
errors.append(
f"Slot {slot_num}: Invalid bobot {bobot_val}. Must be in range [0, 1]"
)
continue
# Calculate CTT category and IRT b parameter
ctt_cat = categorize_difficulty(p_val)
irt_b = convert_ctt_p_to_irt_b(p_val)
# Build item dict
item = {
"tryout_id": tryout_id,
"website_id": website_id,
"slot": slot_num,
"level": level_val,
"stem": str(soal_text_cell).strip(),
"options": options,
"correct_answer": correct_ans,
"explanation": None,
"ctt_p": p_val,
"ctt_bobot": bobot_val,
"ctt_category": ctt_cat,
"irt_b": irt_b,
"irt_se": None,
"calibrated": False,
"calibration_sample_size": 0,
"generated_by": "manual",
"ai_model": None,
"basis_item_id": None,
}
items.append(item)
return {
"items": items,
"validation_errors": errors,
"items_count": len(items)
}
except Exception as e:
return {
"items": [],
"validation_errors": [f"Parsing error: {str(e)}"],
"items_count": 0
}
async def bulk_insert_items(
items_list: List[Dict[str, Any]],
db: AsyncSession
) -> Dict[str, Any]:
"""
Bulk insert items with duplicate detection.
Skips duplicates based on (tryout_id, website_id, slot).
Args:
items_list: List of item dictionaries to insert
db: Async SQLAlchemy database session
Returns:
Dict with:
- inserted_count: int - Number of items inserted
- duplicate_count: int - Number of duplicates skipped
- errors: List[str] - Any errors during insertion
"""
inserted_count = 0
duplicate_count = 0
errors: List[str] = []
try:
for item_data in items_list:
# Check for duplicate
result = await db.execute(
select(Item).where(
Item.tryout_id == item_data["tryout_id"],
Item.website_id == item_data["website_id"],
Item.slot == item_data["slot"]
)
)
existing = result.scalar_one_or_none()
if existing:
duplicate_count += 1
continue
# Create new item
item = Item(**item_data)
db.add(item)
inserted_count += 1
# Commit all inserts
await db.commit()
return {
"inserted_count": inserted_count,
"duplicate_count": duplicate_count,
"errors": errors
}
except Exception as e:
await db.rollback()
return {
"inserted_count": 0,
"duplicate_count": duplicate_count,
"errors": [f"Insertion failed: {str(e)}"]
}
async def export_questions_to_excel(
tryout_id: str,
website_id: int,
db: AsyncSession,
output_path: Optional[str] = None
) -> str:
"""
Export questions to Excel in standardized format.
Creates Excel workbook with:
- Sheet "CONTOH"
- Row 2: KUNCI (answer key)
- Row 4: TK (p-values)
- Row 5: BOBOT (weights)
- Rows 6+: Question data
Args:
tryout_id: Tryout identifier
website_id: Website identifier
db: Async SQLAlchemy database session
output_path: Optional output file path. If not provided, generates temp file.
Returns:
Path to exported Excel file
"""
# Fetch all items for this tryout
result = await db.execute(
select(Item).filter(
Item.tryout_id == tryout_id,
Item.website_id == website_id
).order_by(Item.slot)
)
items = result.scalars().all()
if not items:
raise ValueError(f"No items found for tryout_id={tryout_id}, website_id={website_id}")
# Create workbook
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "CONTOH"
# Determine max slot for column sizing
max_slot = max(item.slot for item in items)
# Row 1: Header
ws.cell(1, 1, "No")
ws.cell(1, 2, "Level")
ws.cell(1, 3, "Soal")
for slot_idx in range(max_slot):
col = slot_idx + 4
ws.cell(1, col, f"Soal {slot_idx + 1}")
# Row 2: KUNCI (answer key)
ws.cell(2, 1, "")
ws.cell(2, 2, "")
ws.cell(2, 3, "KUNCI")
for item in items:
col = item.slot + 3
ws.cell(2, col, item.correct_answer)
# Row 3: Empty
ws.cell(3, 1, "")
ws.cell(3, 2, "")
ws.cell(3, 3, "")
# Row 4: TK (p-values)
ws.cell(4, 1, "")
ws.cell(4, 2, "")
ws.cell(4, 3, "TK")
for item in items:
col = item.slot + 3
ws.cell(4, col, item.ctt_p or 0.5)
# Row 5: BOBOT (weights)
ws.cell(5, 1, "")
ws.cell(5, 2, "")
ws.cell(5, 3, "BOBOT")
for item in items:
col = item.slot + 3
ws.cell(5, col, item.ctt_bobot or (1.0 - (item.ctt_p or 0.5)))
# Rows 6+: Question data
row_idx = 6
for item in items:
# Column 1: Slot number
ws.cell(row_idx, 1, item.slot)
# Column 2: Level
ws.cell(row_idx, 2, item.level)
# Column 3: Soal text (stem)
ws.cell(row_idx, 3, item.stem)
# Columns 4+: Options
options = item.options or {}
ws.cell(row_idx, 4, options.get("A", ""))
ws.cell(row_idx, 5, options.get("B", ""))
ws.cell(row_idx, 6, options.get("C", ""))
ws.cell(row_idx, 7, options.get("D", ""))
# Column 8: Correct answer
ws.cell(row_idx, 8, item.correct_answer)
row_idx += 1
# Generate output path if not provided
if output_path is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = f"/tmp/tryout_{tryout_id}_export_{timestamp}.xlsx"
# Save workbook
wb.save(output_path)
return output_path

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,538 @@
"""
Dynamic Normalization Service.
Implements dynamic normalization with real-time calculation of rataan and SB
for each tryout. Supports multiple normalization modes:
- Static: Use hardcoded rataan/SB from config
- Dynamic: Calculate rataan/SB from participant NM scores in real-time
- Hybrid: Use static until threshold reached, then switch to dynamic
"""
import logging
import math
from datetime import datetime, timezone
from typing import Literal, Optional, Tuple
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.tryout import Tryout
from app.models.tryout_stats import TryoutStats
logger = logging.getLogger(__name__)
async def calculate_dynamic_stats(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> Tuple[Optional[float], Optional[float]]:
"""
Calculate current dynamic stats (rataan and SB) from TryoutStats.
Fetches current TryoutStats for this (tryout_id, website_id) pair
and returns the calculated rataan and SB values.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Tuple of (rataan, sb), both None if no stats exist
"""
result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = result.scalar_one_or_none()
if stats is None:
return None, None
return stats.rataan, stats.sb
async def update_dynamic_normalization(
db: AsyncSession,
website_id: int,
tryout_id: str,
nm: int,
) -> Tuple[float, float]:
"""
Update dynamic normalization with new NM score.
Fetches current TryoutStats and incrementally updates it with the new NM:
- Increments participant_count by 1
- Adds NM to total_nm_sum
- Adds NM² to total_nm_sq_sum
- Recalculates rataan and sb
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
nm: Nilai Mentah (raw score) to add
Returns:
Tuple of updated (rataan, sb)
Raises:
ValueError: If nm is out of valid range [0, 1000]
"""
if not 0 <= nm <= 1000:
raise ValueError(f"nm must be in range [0, 1000], got {nm}")
result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = result.scalar_one_or_none()
if stats is None:
# Initialize new stats record
stats = TryoutStats(
website_id=website_id,
tryout_id=tryout_id,
participant_count=1,
total_nm_sum=float(nm),
total_nm_sq_sum=float(nm * nm),
rataan=float(nm),
sb=0.0, # SD is 0 for single data point
min_nm=nm,
max_nm=nm,
last_calculated=datetime.now(timezone.utc),
)
db.add(stats)
else:
# Incrementally update existing stats
stats.participant_count += 1
stats.total_nm_sum += nm
stats.total_nm_sq_sum += nm * nm
# Update min/max
if stats.min_nm is None or nm < stats.min_nm:
stats.min_nm = nm
if stats.max_nm is None or nm > stats.max_nm:
stats.max_nm = nm
# Recalculate mean and SD
n = stats.participant_count
sum_nm = stats.total_nm_sum
sum_nm_sq = stats.total_nm_sq_sum
# Mean = Σ NM / n
mean = sum_nm / n
stats.rataan = mean
# Variance = (Σ NM² / n) - (mean)²
# Using population standard deviation
if n > 1:
variance = (sum_nm_sq / n) - (mean ** 2)
# Clamp variance to non-negative (handles floating point errors)
variance = max(0.0, variance)
stats.sb = math.sqrt(variance)
else:
stats.sb = 0.0
stats.last_calculated = datetime.now(timezone.utc)
await db.flush()
logger.info(
f"Updated dynamic normalization for tryout {tryout_id}, "
f"website {website_id}: participant_count={stats.participant_count}, "
f"rataan={stats.rataan:.2f}, sb={stats.sb:.2f}"
)
# rataan and sb are always set by this function
assert stats.rataan is not None
assert stats.sb is not None
return stats.rataan, stats.sb
def apply_normalization(
nm: int,
rataan: float,
sb: float,
) -> int:
"""
Apply normalization to NM to get NN (Nilai Nasional).
Formula: NN = 500 + 100 × ((NM - Rataan) / SB)
Normalizes scores to mean=500, SD=100 distribution.
Args:
nm: Nilai Mentah (raw score) in range [0, 1000]
rataan: Mean of NM scores
sb: Standard deviation of NM scores
Returns:
NN (normalized score) in range [0, 1000]
Raises:
ValueError: If nm is out of range or sb is invalid
"""
if not 0 <= nm <= 1000:
raise ValueError(f"nm must be in range [0, 1000], got {nm}")
if sb <= 0:
# If SD is 0 or negative, return default normalized score
# This handles edge case where all scores are identical
return 500
# Calculate normalized score
z_score = (nm - rataan) / sb
nn = 500 + 100 * z_score
# Round to integer and clamp to valid range [0, 1000]
nn_int = round(nn)
return max(0, min(1000, nn_int))
async def get_normalization_mode(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> Literal["static", "dynamic", "hybrid"]:
"""
Get the current normalization mode for a tryout.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Normalization mode: "static", "dynamic", or "hybrid"
Raises:
ValueError: If tryout not found
"""
result = await db.execute(
select(Tryout).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
tryout = result.scalar_one_or_none()
if tryout is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
return tryout.normalization_mode
async def check_threshold_for_dynamic(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> bool:
"""
Check if participant count meets threshold for dynamic normalization.
Compares current participant_count with min_sample_for_dynamic from config.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
True if participant_count >= min_sample_for_dynamic, else False
"""
# Fetch current TryoutStats
stats_result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = stats_result.scalar_one_or_none()
current_participant_count = stats.participant_count if stats else 0
# Fetch min_sample_for_dynamic from config
tryout_result = await db.execute(
select(Tryout.min_sample_for_dynamic).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
min_sample = tryout_result.scalar_one_or_none()
if min_sample is None:
# Default to 100 if not configured
min_sample = 100
return current_participant_count >= min_sample
async def get_normalization_params(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> Tuple[float, float, Literal["static", "dynamic"]]:
"""
Get normalization parameters (rataan, sb) based on current mode.
Determines which normalization parameters to use:
- Static mode: Use config.static_rataan and config.static_sb
- Dynamic mode: Use calculated rataan and sb from TryoutStats
- Hybrid mode: Use static until threshold reached, then dynamic
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Tuple of (rataan, sb, mode_used)
Raises:
ValueError: If tryout not found or dynamic stats unavailable
"""
# Get normalization mode
mode = await get_normalization_mode(db, website_id, tryout_id)
if mode == "static":
# Use static values from config
result = await db.execute(
select(Tryout.static_rataan, Tryout.static_sb).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
row = result.scalar_one_or_none()
if row is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
rataan, sb = row
return rataan, sb, "static"
elif mode == "dynamic":
# Use dynamic values from stats
rataan, sb = await calculate_dynamic_stats(db, website_id, tryout_id)
if rataan is None or sb is None:
raise ValueError(
f"Dynamic normalization not available for tryout {tryout_id}. "
"No stats have been calculated yet."
)
if sb == 0:
logger.warning(
f"Standard deviation is 0 for tryout {tryout_id}. "
"All NM scores are identical."
)
return rataan, sb, "dynamic"
else: # hybrid
# Check threshold
threshold_met = await check_threshold_for_dynamic(db, website_id, tryout_id)
if threshold_met:
# Use dynamic values
rataan, sb = await calculate_dynamic_stats(db, website_id, tryout_id)
if rataan is None or sb is None:
# Fallback to static if dynamic not available
result = await db.execute(
select(Tryout.static_rataan, Tryout.static_sb).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
row = result.scalar_one_or_none()
if row is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
rataan, sb = row
return rataan, sb, "static"
return rataan, sb, "dynamic"
else:
# Use static values
result = await db.execute(
select(Tryout.static_rataan, Tryout.static_sb).where(
Tryout.website_id == website_id,
Tryout.tryout_id == tryout_id,
)
)
row = result.scalar_one_or_none()
if row is None:
raise ValueError(
f"Tryout {tryout_id} not found for website {website_id}"
)
rataan, sb = row
return rataan, sb, "static"
async def calculate_skewness(
db: AsyncSession,
website_id: int,
tryout_id: str,
) -> Optional[float]:
"""
Calculate skewness of NM distribution for validation.
Skewness measures the asymmetry of the probability distribution.
Values:
- Skewness ≈ 0: Symmetric distribution
- Skewness > 0: Right-skewed (tail to the right)
- Skewness < 0: Left-skewed (tail to the left)
Formula: Skewness = (n / ((n-1)(n-2))) * Σ((x - mean) / SD)³
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
Returns:
Skewness value, or None if insufficient data
"""
result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = result.scalar_one_or_none()
if stats is None or stats.participant_count < 3:
# Need at least 3 samples for skewness calculation
return None
n = stats.participant_count
mean = stats.rataan
sd = stats.sb
if sd == 0:
return 0.0 # All values are identical
# Calculate skewness
# We need individual NM values, which we don't have in TryoutStats
# For now, return None as we need a different approach
# This would require storing all NM values or calculating on-the-fly
return None
async def validate_dynamic_normalization(
db: AsyncSession,
website_id: int,
tryout_id: str,
target_mean: float = 500.0,
target_sd: float = 100.0,
mean_tolerance: float = 5.0,
sd_tolerance: float = 5.0,
) -> Tuple[bool, dict]:
"""
Validate that dynamic normalization produces expected distribution.
Checks if calculated rataan and sb are close to target values.
Args:
db: Async database session
website_id: Website identifier
tryout_id: Tryout identifier
target_mean: Target mean (default: 500)
target_sd: Target standard deviation (default: 100)
mean_tolerance: Allowed deviation from target mean (default: 5)
sd_tolerance: Allowed deviation from target SD (default: 5)
Returns:
Tuple of (is_valid, validation_details)
validation_details contains:
- participant_count: Number of participants
- current_rataan: Current mean
- current_sb: Current standard deviation
- mean_deviation: Absolute deviation from target mean
- sd_deviation: Absolute deviation from target SD
- mean_within_tolerance: True if mean deviation < mean_tolerance
- sd_within_tolerance: True if SD deviation < sd_tolerance
- warnings: List of warning messages
- suggestions: List of suggestions
"""
# Get current stats
result = await db.execute(
select(TryoutStats).where(
TryoutStats.website_id == website_id,
TryoutStats.tryout_id == tryout_id,
)
)
stats = result.scalar_one_or_none()
if stats is None or stats.rataan is None or stats.sb is None:
return False, {
"participant_count": 0,
"current_rataan": None,
"current_sb": None,
"mean_deviation": None,
"sd_deviation": None,
"mean_within_tolerance": False,
"sd_within_tolerance": False,
"warnings": ["No statistics available for validation"],
"suggestions": ["Wait for more participants to complete sessions"],
}
# Calculate deviations
mean_deviation = abs(stats.rataan - target_mean)
sd_deviation = abs(stats.sb - target_sd)
# Check tolerance
mean_within_tolerance = mean_deviation <= mean_tolerance
sd_within_tolerance = sd_deviation <= sd_tolerance
is_valid = mean_within_tolerance and sd_within_tolerance
# Generate warnings
warnings = []
suggestions = []
if not mean_within_tolerance:
warnings.append(f"Mean deviation ({mean_deviation:.2f}) exceeds tolerance ({mean_tolerance})")
if stats.rataan > target_mean:
suggestions.append("Distribution may be right-skewed - consider checking question difficulty")
else:
suggestions.append("Distribution may be left-skewed - consider checking question difficulty")
if not sd_within_tolerance:
warnings.append(f"SD deviation ({sd_deviation:.2f}) exceeds tolerance ({sd_tolerance})")
if stats.sb < target_sd:
suggestions.append("SD too low - scores may be too tightly clustered")
else:
suggestions.append("SD too high - scores may have too much variance")
# Check for skewness
skewness = await calculate_skewness(db, website_id, tryout_id)
if skewness is not None and abs(skewness) > 0.5:
warnings.append(f"Distribution skewness ({skewness:.2f}) > 0.5 - distribution may be asymmetric")
suggestions.append("Consider using static normalization if dynamic normalization is unstable")
# Check participant count
if stats.participant_count < 100:
suggestions.append(f"Participant count ({stats.participant_count}) below recommended minimum (100)")
return is_valid, {
"participant_count": stats.participant_count,
"current_rataan": stats.rataan,
"current_sb": stats.sb,
"mean_deviation": mean_deviation,
"sd_deviation": sd_deviation,
"mean_within_tolerance": mean_within_tolerance,
"sd_within_tolerance": sd_within_tolerance,
"warnings": warnings,
"suggestions": suggestions,
}

1449
app/services/reporting.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,456 @@
"""
WordPress Authentication and User Synchronization Service.
Handles:
- JWT token validation via WordPress REST API
- User synchronization from WordPress to local database
- Multi-site support via website_id isolation
"""
import logging
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
import httpx
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import get_settings
from app.models.user import User
from app.models.website import Website
logger = logging.getLogger(__name__)
settings = get_settings()
# Custom exceptions for WordPress integration
class WordPressAuthError(Exception):
"""Base exception for WordPress authentication errors."""
pass
class WordPressTokenInvalidError(WordPressAuthError):
"""Raised when WordPress token is invalid or expired."""
pass
class WordPressAPIError(WordPressAuthError):
"""Raised when WordPress API is unreachable or returns error."""
pass
class WordPressRateLimitError(WordPressAuthError):
"""Raised when WordPress API rate limit is exceeded."""
pass
class WebsiteNotFoundError(WordPressAuthError):
"""Raised when website_id is not found in local database."""
pass
@dataclass
class WordPressUserInfo:
"""Data class for WordPress user information."""
wp_user_id: str
username: str
email: str
display_name: str
roles: list[str]
raw_data: dict[str, Any]
@dataclass
class SyncStats:
"""Data class for user synchronization statistics."""
inserted: int
updated: int
total: int
errors: int
async def get_wordpress_api_base(website: Website) -> str:
"""
Get WordPress API base URL for a website.
Args:
website: Website model instance
Returns:
WordPress REST API base URL
"""
# Use website's site_url if configured, otherwise use global config
base_url = website.site_url.rstrip('/')
return f"{base_url}/wp-json"
async def verify_wordpress_token(
token: str,
website_id: int,
wp_user_id: str,
db: AsyncSession,
) -> Optional[WordPressUserInfo]:
"""
Verify WordPress JWT token and validate user identity.
Calls WordPress REST API GET /wp/v2/users/me with Authorization header.
Verifies response contains matching wp_user_id.
Verifies website_id exists in local database.
Args:
token: WordPress JWT authentication token
website_id: Website identifier for multi-site isolation
wp_user_id: Expected WordPress user ID to verify
db: Async database session
Returns:
WordPressUserInfo if valid, None if invalid
Raises:
WebsiteNotFoundError: If website_id doesn't exist
WordPressTokenInvalidError: If token is invalid
WordPressAPIError: If API is unreachable
WordPressRateLimitError: If rate limited
"""
# Verify website exists
website_result = await db.execute(
select(Website).where(Website.id == website_id)
)
website = website_result.scalar_one_or_none()
if website is None:
raise WebsiteNotFoundError(f"Website {website_id} not found")
api_base = await get_wordpress_api_base(website)
url = f"{api_base}/wp/v2/users/me"
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
}
timeout = httpx.Timeout(10.0, connect=5.0)
try:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(url, headers=headers)
if response.status_code == 401:
raise WordPressTokenInvalidError("Invalid or expired WordPress token")
if response.status_code == 429:
raise WordPressRateLimitError("WordPress API rate limit exceeded")
if response.status_code == 503:
raise WordPressAPIError("WordPress API service unavailable")
if response.status_code != 200:
raise WordPressAPIError(
f"WordPress API error: {response.status_code} - {response.text}"
)
data = response.json()
# Verify user ID matches
response_user_id = str(data.get("id", ""))
if response_user_id != str(wp_user_id):
logger.warning(
f"User ID mismatch: expected {wp_user_id}, got {response_user_id}"
)
return None
# Extract user info
user_info = WordPressUserInfo(
wp_user_id=response_user_id,
username=data.get("username", ""),
email=data.get("email", ""),
display_name=data.get("name", ""),
roles=data.get("roles", []),
raw_data=data,
)
return user_info
except httpx.TimeoutException:
raise WordPressAPIError("WordPress API request timed out")
except httpx.ConnectError:
raise WordPressAPIError("Unable to connect to WordPress API")
except httpx.HTTPError as e:
raise WordPressAPIError(f"HTTP error communicating with WordPress: {str(e)}")
async def fetch_wordpress_users(
website: Website,
admin_token: str,
page: int = 1,
per_page: int = 100,
) -> list[dict[str, Any]]:
"""
Fetch users from WordPress API (requires admin token).
Calls WordPress REST API GET /wp/v2/users with admin authorization.
Args:
website: Website model instance
admin_token: WordPress admin JWT token
page: Page number for pagination
per_page: Number of users per page (max 100)
Returns:
List of WordPress user data dictionaries
Raises:
WordPressTokenInvalidError: If admin token is invalid
WordPressAPIError: If API is unreachable
WordPressRateLimitError: If rate limited
"""
api_base = await get_wordpress_api_base(website)
url = f"{api_base}/wp/v2/users"
headers = {
"Authorization": f"Bearer {admin_token}",
"Accept": "application/json",
}
params = {
"page": page,
"per_page": min(per_page, 100),
"context": "edit", # Get full user data
}
timeout = httpx.Timeout(30.0, connect=10.0)
try:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(url, headers=headers, params=params)
if response.status_code == 401:
raise WordPressTokenInvalidError("Invalid admin token for user sync")
if response.status_code == 403:
raise WordPressTokenInvalidError(
"Admin token lacks permission to list users"
)
if response.status_code == 429:
raise WordPressRateLimitError("WordPress API rate limit exceeded")
if response.status_code == 503:
raise WordPressAPIError("WordPress API service unavailable")
if response.status_code != 200:
raise WordPressAPIError(
f"WordPress API error: {response.status_code} - {response.text}"
)
return response.json()
except httpx.TimeoutException:
raise WordPressAPIError("WordPress API request timed out")
except httpx.ConnectError:
raise WordPressAPIError("Unable to connect to WordPress API")
except httpx.HTTPError as e:
raise WordPressAPIError(f"HTTP error communicating with WordPress: {str(e)}")
async def sync_wordpress_users(
website_id: int,
admin_token: str,
db: AsyncSession,
) -> SyncStats:
"""
Synchronize users from WordPress to local database.
Fetches all users from WordPress API and performs upsert:
- Updates existing users
- Inserts new users
Args:
website_id: Website identifier for multi-site isolation
admin_token: WordPress admin JWT token
db: Async database session
Returns:
SyncStats with insertion/update counts
Raises:
WebsiteNotFoundError: If website_id doesn't exist
WordPressTokenInvalidError: If admin token is invalid
WordPressAPIError: If API is unreachable
"""
# Verify website exists
website_result = await db.execute(
select(Website).where(Website.id == website_id)
)
website = website_result.scalar_one_or_none()
if website is None:
raise WebsiteNotFoundError(f"Website {website_id} not found")
# Fetch existing users from local database
existing_users_result = await db.execute(
select(User).where(User.website_id == website_id)
)
existing_users = {
str(user.wp_user_id): user
for user in existing_users_result.scalars().all()
}
# Fetch users from WordPress (with pagination)
all_wp_users = []
page = 1
per_page = 100
while True:
wp_users = await fetch_wordpress_users(
website, admin_token, page, per_page
)
if not wp_users:
break
all_wp_users.extend(wp_users)
# Check if more pages
if len(wp_users) < per_page:
break
page += 1
# Sync users
inserted = 0
updated = 0
errors = 0
for wp_user in all_wp_users:
try:
wp_user_id = str(wp_user.get("id", ""))
if not wp_user_id:
errors += 1
continue
if wp_user_id in existing_users:
# Update existing user (timestamp update)
existing_user = existing_users[wp_user_id]
existing_user.updated_at = datetime.now(timezone.utc)
updated += 1
else:
# Insert new user
new_user = User(
wp_user_id=wp_user_id,
website_id=website_id,
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
)
db.add(new_user)
inserted += 1
except Exception as e:
logger.error(f"Error syncing user {wp_user.get('id')}: {e}")
errors += 1
await db.commit()
total = inserted + updated
logger.info(
f"WordPress user sync complete for website {website_id}: "
f"{inserted} inserted, {updated} updated, {errors} errors"
)
return SyncStats(
inserted=inserted,
updated=updated,
total=total,
errors=errors,
)
async def get_wordpress_user(
wp_user_id: str,
website_id: int,
db: AsyncSession,
) -> Optional[User]:
"""
Get user from local database by WordPress user ID and website ID.
Args:
wp_user_id: WordPress user ID
website_id: Website identifier for multi-site isolation
db: Async database session
Returns:
User object if found, None otherwise
"""
result = await db.execute(
select(User).where(
User.wp_user_id == wp_user_id,
User.website_id == website_id,
)
)
return result.scalar_one_or_none()
async def verify_website_exists(
website_id: int,
db: AsyncSession,
) -> Website:
"""
Verify website exists in database.
Args:
website_id: Website identifier
db: Async database session
Returns:
Website model instance
Raises:
WebsiteNotFoundError: If website doesn't exist
"""
result = await db.execute(
select(Website).where(Website.id == website_id)
)
website = result.scalar_one_or_none()
if website is None:
raise WebsiteNotFoundError(f"Website {website_id} not found")
return website
async def get_or_create_user(
wp_user_id: str,
website_id: int,
db: AsyncSession,
) -> User:
"""
Get existing user or create new one if not exists.
Args:
wp_user_id: WordPress user ID
website_id: Website identifier
db: Async database session
Returns:
User model instance
"""
existing = await get_wordpress_user(wp_user_id, website_id, db)
if existing:
return existing
# Create new user
new_user = User(
wp_user_id=wp_user_id,
website_id=website_id,
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
)
db.add(new_user)
await db.commit()
await db.refresh(new_user)
return new_user