first commit

2026-03-21 23:32:59 +07:00
commit cf193d7ea0
57 changed files with 17871 additions and 0 deletions
--- a/app/services/init.py
+++ b/app/services/init.py
@@ -0,0 +1,155 @@
+"""
+Services module for IRT Bank Soal.
+
+Contains business logic services for:
+- IRT calibration
+- CAT selection
+- WordPress authentication
+- AI question generation
+- Reporting
+"""
+
+from app.services.irt_calibration import (
+    IRTCalibrationError,
+    calculate_fisher_information,
+    calculate_item_information,
+    calculate_probability,
+    calculate_theta_se,
+    estimate_b_from_ctt_p,
+    estimate_theta_mle,
+    get_session_responses,
+    nn_to_theta,
+    theta_to_nn,
+    update_session_theta,
+    update_theta_after_response,
+)
+from app.services.cat_selection import (
+    CATSelectionError,
+    NextItemResult,
+    TerminationCheck,
+    check_user_level_reuse,
+    get_available_levels_for_slot,
+    get_next_item,
+    get_next_item_adaptive,
+    get_next_item_fixed,
+    get_next_item_hybrid,
+    should_terminate,
+    simulate_cat_selection,
+    update_theta,
+)
+from app.services.wordpress_auth import (
+    WordPressAPIError,
+    WordPressAuthError,
+    WordPressRateLimitError,
+    WordPressTokenInvalidError,
+    WordPressUserInfo,
+    WebsiteNotFoundError,
+    SyncStats,
+    fetch_wordpress_users,
+    get_or_create_user,
+    get_wordpress_user,
+    sync_wordpress_users,
+    verify_website_exists,
+    verify_wordpress_token,
+)
+from app.services.ai_generation import (
+    call_openrouter_api,
+    check_cache_reuse,
+    generate_question,
+    generate_with_cache_check,
+    get_ai_stats,
+    get_prompt_template,
+    parse_ai_response,
+    save_ai_question,
+    validate_ai_model,
+    SUPPORTED_MODELS,
+)
+from app.services.reporting import (
+    generate_student_performance_report,
+    generate_item_analysis_report,
+    generate_calibration_status_report,
+    generate_tryout_comparison_report,
+    export_report_to_csv,
+    export_report_to_excel,
+    export_report_to_pdf,
+    schedule_report,
+    get_scheduled_report,
+    list_scheduled_reports,
+    cancel_scheduled_report,
+    StudentPerformanceReport,
+    ItemAnalysisReport,
+    CalibrationStatusReport,
+    TryoutComparisonReport,
+    ReportSchedule,
+)
+
+__all__ = [
+    # IRT Calibration
+    "IRTCalibrationError",
+    "calculate_fisher_information",
+    "calculate_item_information",
+    "calculate_probability",
+    "calculate_theta_se",
+    "estimate_b_from_ctt_p",
+    "estimate_theta_mle",
+    "get_session_responses",
+    "nn_to_theta",
+    "theta_to_nn",
+    "update_session_theta",
+    "update_theta_after_response",
+    # CAT Selection
+    "CATSelectionError",
+    "NextItemResult",
+    "TerminationCheck",
+    "check_user_level_reuse",
+    "get_available_levels_for_slot",
+    "get_next_item",
+    "get_next_item_adaptive",
+    "get_next_item_fixed",
+    "get_next_item_hybrid",
+    "should_terminate",
+    "simulate_cat_selection",
+    "update_theta",
+    # WordPress Auth
+    "WordPressAPIError",
+    "WordPressAuthError",
+    "WordPressRateLimitError",
+    "WordPressTokenInvalidError",
+    "WordPressUserInfo",
+    "WebsiteNotFoundError",
+    "SyncStats",
+    "fetch_wordpress_users",
+    "get_or_create_user",
+    "get_wordpress_user",
+    "sync_wordpress_users",
+    "verify_website_exists",
+    "verify_wordpress_token",
+    # AI Generation
+    "call_openrouter_api",
+    "check_cache_reuse",
+    "generate_question",
+    "generate_with_cache_check",
+    "get_ai_stats",
+    "get_prompt_template",
+    "parse_ai_response",
+    "save_ai_question",
+    "validate_ai_model",
+    "SUPPORTED_MODELS",
+    # Reporting
+    "generate_student_performance_report",
+    "generate_item_analysis_report",
+    "generate_calibration_status_report",
+    "generate_tryout_comparison_report",
+    "export_report_to_csv",
+    "export_report_to_excel",
+    "export_report_to_pdf",
+    "schedule_report",
+    "get_scheduled_report",
+    "list_scheduled_reports",
+    "cancel_scheduled_report",
+    "StudentPerformanceReport",
+    "ItemAnalysisReport",
+    "CalibrationStatusReport",
+    "TryoutComparisonReport",
+    "ReportSchedule",
+]
--- a/app/services/ai_generation.py
+++ b/app/services/ai_generation.py
@@ -0,0 +1,595 @@
+"""
+AI Question Generation Service.
+
+Handles OpenRouter API integration for generating question variants.
+Implements caching, user-level reuse checking, and prompt engineering.
+"""
+
+import json
+import logging
+import re
+from typing import Any, Dict, Literal, Optional, Union
+
+import httpx
+from sqlalchemy import and_, func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.config import get_settings
+from app.models.item import Item
+from app.models.tryout import Tryout
+from app.models.user_answer import UserAnswer
+from app.schemas.ai import GeneratedQuestion
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+# OpenRouter API configuration
+OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
+
+# Supported AI models
+SUPPORTED_MODELS = {
+    "qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B",
+    "meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B",
+}
+
+# Level mapping for prompts
+LEVEL_DESCRIPTIONS = {
+    "mudah": "easier (simpler concepts, more straightforward calculations)",
+    "sedang": "medium difficulty",
+    "sulit": "harder (more complex concepts, multi-step reasoning)",
+}
+
+
+def get_prompt_template(
+    basis_stem: str,
+    basis_options: Dict[str, str],
+    basis_correct: str,
+    basis_explanation: Optional[str],
+    target_level: Literal["mudah", "sulit"],
+) -> str:
+    """
+    Generate standardized prompt for AI question generation.
+
+    Args:
+        basis_stem: The basis question stem
+        basis_options: The basis question options
+        basis_correct: The basis correct answer
+        basis_explanation: The basis explanation
+        target_level: Target difficulty level
+
+    Returns:
+        Formatted prompt string
+    """
+    level_desc = LEVEL_DESCRIPTIONS.get(target_level, target_level)
+
+    options_text = "\n".join(
+        [f"  {key}: {value}" for key, value in basis_options.items()]
+    )
+
+    explanation_text = (
+        f"Explanation: {basis_explanation}"
+        if basis_explanation
+        else "Explanation: (not provided)"
+    )
+
+    prompt = f"""You are an educational content creator specializing in creating assessment questions.
+
+Given a "Sedang" (medium difficulty) question, generate a new question at a different difficulty level.
+
+BASIS QUESTION (Sedang level):
+Question: {basis_stem}
+Options:
+{options_text}
+Correct Answer: {basis_correct}
+{explanation_text}
+
+TASK:
+Generate 1 new question that is {level_desc} than the basis question above.
+
+REQUIREMENTS:
+1. Keep the SAME topic/subject matter as the basis question
+2. Use similar context and terminology
+3. Create exactly 4 answer options (A, B, C, D)
+4. Only ONE correct answer
+5. Include a clear explanation of why the correct answer is correct
+6. Make the question noticeably {level_desc} - not just a minor variation
+
+OUTPUT FORMAT:
+Return ONLY a valid JSON object with this exact structure (no markdown, no code blocks):
+{{"stem": "Your question text here", "options": {{"A": "Option A text", "B": "Option B text", "C": "Option C text", "D": "Option D text"}}, "correct": "A", "explanation": "Explanation text here"}}
+
+Remember: The correct field must be exactly "A", "B", "C", or "D"."""
+
+    return prompt
+
+
+def parse_ai_response(response_text: str) -> Optional[GeneratedQuestion]:
+    """
+    Parse AI response to extract question data.
+
+    Handles various response formats including JSON code blocks.
+
+    Args:
+        response_text: Raw AI response text
+
+    Returns:
+        GeneratedQuestion if parsing successful, None otherwise
+    """
+    if not response_text:
+        return None
+
+    # Clean the response text
+    cleaned = response_text.strip()
+
+    # Try to extract JSON from code blocks if present
+    json_patterns = [
+        r"```json\s*([\s\S]*?)\s*```",  # ```json ... ```
+        r"```\s*([\s\S]*?)\s*```",  # ``` ... ```
+        r"(\{[\s\S]*\})",  # Raw JSON object
+    ]
+
+    for pattern in json_patterns:
+        match = re.search(pattern, cleaned)
+        if match:
+            json_str = match.group(1).strip()
+            try:
+                data = json.loads(json_str)
+                return validate_and_create_question(data)
+            except json.JSONDecodeError:
+                continue
+
+    # Try parsing the entire response as JSON
+    try:
+        data = json.loads(cleaned)
+        return validate_and_create_question(data)
+    except json.JSONDecodeError:
+        pass
+
+    logger.warning(f"Failed to parse AI response: {cleaned[:200]}...")
+    return None
+
+
+def validate_and_create_question(data: Dict[str, Any]) -> Optional[GeneratedQuestion]:
+    """
+    Validate parsed data and create GeneratedQuestion.
+
+    Args:
+        data: Parsed JSON data
+
+    Returns:
+        GeneratedQuestion if valid, None otherwise
+    """
+    required_fields = ["stem", "options", "correct"]
+    if not all(field in data for field in required_fields):
+        logger.warning(f"Missing required fields in AI response: {data.keys()}")
+        return None
+
+    # Validate options
+    options = data.get("options", {})
+    if not isinstance(options, dict):
+        logger.warning("Options is not a dictionary")
+        return None
+
+    required_options = {"A", "B", "C", "D"}
+    if not required_options.issubset(set(options.keys())):
+        logger.warning(f"Missing required options: {required_options - set(options.keys())}")
+        return None
+
+    # Validate correct answer
+    correct = str(data.get("correct", "")).upper()
+    if correct not in required_options:
+        logger.warning(f"Invalid correct answer: {correct}")
+        return None
+
+    return GeneratedQuestion(
+        stem=str(data["stem"]).strip(),
+        options={k: str(v).strip() for k, v in options.items()},
+        correct=correct,
+        explanation=str(data.get("explanation", "")).strip() or None,
+    )
+
+
+async def call_openrouter_api(
+    prompt: str,
+    model: str,
+    max_retries: int = 3,
+) -> Optional[str]:
+    """
+    Call OpenRouter API to generate question.
+
+    Args:
+        prompt: The prompt to send
+        model: AI model to use
+        max_retries: Maximum retry attempts
+
+    Returns:
+        API response text or None if failed
+    """
+    if not settings.OPENROUTER_API_KEY:
+        logger.error("OPENROUTER_API_KEY not configured")
+        return None
+
+    if model not in SUPPORTED_MODELS:
+        logger.error(f"Unsupported AI model: {model}")
+        return None
+
+    headers = {
+        "Authorization": f"Bearer {settings.OPENROUTER_API_KEY}",
+        "Content-Type": "application/json",
+        "HTTP-Referer": "https://github.com/irt-bank-soal",
+        "X-Title": "IRT Bank Soal",
+    }
+
+    payload = {
+        "model": model,
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ],
+        "max_tokens": 2000,
+        "temperature": 0.7,
+    }
+
+    timeout = httpx.Timeout(settings.OPENROUTER_TIMEOUT)
+
+    for attempt in range(max_retries):
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                response = await client.post(
+                    OPENROUTER_API_URL,
+                    headers=headers,
+                    json=payload,
+                )
+
+                if response.status_code == 200:
+                    data = response.json()
+                    choices = data.get("choices", [])
+                    if choices:
+                        message = choices[0].get("message", {})
+                        return message.get("content")
+                    logger.warning("No choices in OpenRouter response")
+                    return None
+
+                elif response.status_code == 429:
+                    # Rate limited - wait and retry
+                    logger.warning(f"Rate limited, attempt {attempt + 1}/{max_retries}")
+                    if attempt < max_retries - 1:
+                        import asyncio
+                        await asyncio.sleep(2 ** attempt)
+                        continue
+                    return None
+
+                else:
+                    logger.error(
+                        f"OpenRouter API error: {response.status_code} - {response.text}"
+                    )
+                    return None
+
+        except httpx.TimeoutException:
+            logger.warning(f"OpenRouter timeout, attempt {attempt + 1}/{max_retries}")
+            if attempt < max_retries - 1:
+                continue
+            return None
+
+        except Exception as e:
+            logger.error(f"OpenRouter API call failed: {e}")
+            if attempt < max_retries - 1:
+                continue
+            return None
+
+    return None
+
+
+async def generate_question(
+    basis_item: Item,
+    target_level: Literal["mudah", "sulit"],
+    ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
+) -> Optional[GeneratedQuestion]:
+    """
+    Generate a new question based on a basis item.
+
+    Args:
+        basis_item: The basis item (must be sedang level)
+        target_level: Target difficulty level
+        ai_model: AI model to use
+
+    Returns:
+        GeneratedQuestion if successful, None otherwise
+    """
+    # Build prompt
+    prompt = get_prompt_template(
+        basis_stem=basis_item.stem,
+        basis_options=basis_item.options,
+        basis_correct=basis_item.correct_answer,
+        basis_explanation=basis_item.explanation,
+        target_level=target_level,
+    )
+
+    # Call OpenRouter API
+    response_text = await call_openrouter_api(prompt, ai_model)
+
+    if not response_text:
+        logger.error("No response from OpenRouter API")
+        return None
+
+    # Parse response
+    generated = parse_ai_response(response_text)
+
+    if not generated:
+        logger.error("Failed to parse AI response")
+        return None
+
+    return generated
+
+
+async def check_cache_reuse(
+    tryout_id: str,
+    slot: int,
+    level: str,
+    wp_user_id: str,
+    website_id: int,
+    db: AsyncSession,
+) -> Optional[Item]:
+    """
+    Check if there's a cached item that the user hasn't answered yet.
+
+    Query DB for existing item matching (tryout_id, slot, level).
+    Check if user already answered this item at this difficulty level.
+
+    Args:
+        tryout_id: Tryout identifier
+        slot: Question slot
+        level: Difficulty level
+        wp_user_id: WordPress user ID
+        website_id: Website identifier
+        db: Database session
+
+    Returns:
+        Cached item if found and user hasn't answered, None otherwise
+    """
+    # Find existing items at this slot/level
+    result = await db.execute(
+        select(Item).where(
+            and_(
+                Item.tryout_id == tryout_id,
+                Item.website_id == website_id,
+                Item.slot == slot,
+                Item.level == level,
+            )
+        )
+    )
+    existing_items = result.scalars().all()
+
+    if not existing_items:
+        return None
+
+    # Check each item to find one the user hasn't answered
+    for item in existing_items:
+        # Check if user has answered this item
+        answer_result = await db.execute(
+            select(UserAnswer).where(
+                and_(
+                    UserAnswer.item_id == item.id,
+                    UserAnswer.wp_user_id == wp_user_id,
+                )
+            )
+        )
+        user_answer = answer_result.scalar_one_or_none()
+
+        if user_answer is None:
+            # User hasn't answered this item - can reuse
+            logger.info(
+                f"Cache hit for tryout={tryout_id}, slot={slot}, level={level}, "
+                f"item_id={item.id}, user={wp_user_id}"
+            )
+            return item
+
+    # All items have been answered by this user
+    logger.info(
+        f"Cache miss (user answered all) for tryout={tryout_id}, slot={slot}, "
+        f"level={level}, user={wp_user_id}"
+    )
+    return None
+
+
+async def generate_with_cache_check(
+    tryout_id: str,
+    slot: int,
+    level: Literal["mudah", "sulit"],
+    wp_user_id: str,
+    website_id: int,
+    db: AsyncSession,
+    ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
+) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
+    """
+    Generate question with cache checking.
+
+    First checks if AI generation is enabled for the tryout.
+    Then checks for cached items the user hasn't answered.
+    If cache miss, generates new question via AI.
+
+    Args:
+        tryout_id: Tryout identifier
+        slot: Question slot
+        level: Target difficulty level
+        wp_user_id: WordPress user ID
+        website_id: Website identifier
+        db: Database session
+        ai_model: AI model to use
+
+    Returns:
+        Tuple of (item/question or None, is_cached)
+    """
+    # Check if AI generation is enabled for this tryout
+    tryout_result = await db.execute(
+        select(Tryout).where(
+            and_(
+                Tryout.tryout_id == tryout_id,
+                Tryout.website_id == website_id,
+            )
+        )
+    )
+    tryout = tryout_result.scalar_one_or_none()
+
+    if tryout and not tryout.ai_generation_enabled:
+        logger.info(f"AI generation disabled for tryout={tryout_id}")
+        # Still check cache even if AI disabled
+        cached_item = await check_cache_reuse(
+            tryout_id, slot, level, wp_user_id, website_id, db
+        )
+        if cached_item:
+            return cached_item, True
+        return None, False
+
+    # Check cache for reusable item
+    cached_item = await check_cache_reuse(
+        tryout_id, slot, level, wp_user_id, website_id, db
+    )
+
+    if cached_item:
+        return cached_item, True
+
+    # Cache miss - need to generate
+    # Get basis item (sedang level at same slot)
+    basis_result = await db.execute(
+        select(Item).where(
+            and_(
+                Item.tryout_id == tryout_id,
+                Item.website_id == website_id,
+                Item.slot == slot,
+                Item.level == "sedang",
+            )
+        ).limit(1)
+    )
+    basis_item = basis_result.scalar_one_or_none()
+
+    if not basis_item:
+        logger.error(
+            f"No basis item found for tryout={tryout_id}, slot={slot}"
+        )
+        return None, False
+
+    # Generate new question
+    generated = await generate_question(basis_item, level, ai_model)
+
+    if not generated:
+        logger.error(
+            f"Failed to generate question for tryout={tryout_id}, slot={slot}, level={level}"
+        )
+        return None, False
+
+    return generated, False
+
+
+async def save_ai_question(
+    generated_data: GeneratedQuestion,
+    tryout_id: str,
+    website_id: int,
+    basis_item_id: int,
+    slot: int,
+    level: Literal["mudah", "sedang", "sulit"],
+    ai_model: str,
+    db: AsyncSession,
+) -> Optional[int]:
+    """
+    Save AI-generated question to database.
+
+    Args:
+        generated_data: Generated question data
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        basis_item_id: Basis item ID
+        slot: Question slot
+        level: Difficulty level
+        ai_model: AI model used
+        db: Database session
+
+    Returns:
+        Created item ID or None if failed
+    """
+    try:
+        new_item = Item(
+            tryout_id=tryout_id,
+            website_id=website_id,
+            slot=slot,
+            level=level,
+            stem=generated_data.stem,
+            options=generated_data.options,
+            correct_answer=generated_data.correct,
+            explanation=generated_data.explanation,
+            generated_by="ai",
+            ai_model=ai_model,
+            basis_item_id=basis_item_id,
+            calibrated=False,
+            ctt_p=None,
+            ctt_bobot=None,
+            ctt_category=None,
+            irt_b=None,
+            irt_se=None,
+            calibration_sample_size=0,
+        )
+
+        db.add(new_item)
+        await db.flush()  # Get the ID without committing
+
+        logger.info(
+            f"Saved AI-generated item: id={new_item.id}, tryout={tryout_id}, "
+            f"slot={slot}, level={level}, model={ai_model}"
+        )
+
+        return new_item.id
+
+    except Exception as e:
+        logger.error(f"Failed to save AI-generated question: {e}")
+        return None
+
+
+async def get_ai_stats(db: AsyncSession) -> Dict[str, Any]:
+    """
+    Get AI generation statistics.
+
+    Args:
+        db: Database session
+
+    Returns:
+        Statistics dictionary
+    """
+    # Total AI-generated items
+    total_result = await db.execute(
+        select(func.count(Item.id)).where(Item.generated_by == "ai")
+    )
+    total_ai_items = total_result.scalar() or 0
+
+    # Items by model
+    model_result = await db.execute(
+        select(Item.ai_model, func.count(Item.id))
+        .where(Item.generated_by == "ai")
+        .where(Item.ai_model.isnot(None))
+        .group_by(Item.ai_model)
+    )
+    items_by_model = {row[0]: row[1] for row in model_result.all()}
+
+    # Note: Cache hit rate would need to be tracked separately
+    # This is a placeholder for now
+    return {
+        "total_ai_items": total_ai_items,
+        "items_by_model": items_by_model,
+        "cache_hit_rate": 0.0,
+        "total_cache_hits": 0,
+        "total_requests": 0,
+    }
+
+
+def validate_ai_model(model: str) -> bool:
+    """
+    Validate that the AI model is supported.
+
+    Args:
+        model: AI model identifier
+
+    Returns:
+        True if model is supported
+    """
+    return model in SUPPORTED_MODELS
--- a/app/services/cat_selection.py
+++ b/app/services/cat_selection.py
@@ -0,0 +1,702 @@
+"""
+CAT (Computerized Adaptive Testing) Selection Service.
+
+Implements adaptive item selection algorithms for IRT-based testing.
+Supports three modes: CTT (fixed), IRT (adaptive), and hybrid.
+"""
+
+import math
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Literal, Optional
+
+from sqlalchemy import and_, not_, or_, select, func
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
+
+from app.models import Item, Session, Tryout, UserAnswer
+from app.services.irt_calibration import (
+    calculate_item_information,
+    estimate_b_from_ctt_p,
+    estimate_theta_mle,
+    update_theta_after_response,
+)
+
+
+class CATSelectionError(Exception):
+    """Exception raised for CAT selection errors."""
+    pass
+
+
+@dataclass
+class NextItemResult:
+    """Result of next item selection."""
+    item: Optional[Item]
+    selection_method: str  # 'fixed', 'adaptive', 'hybrid'
+    slot: Optional[int]
+    level: Optional[str]
+    reason: str  # Why this item was selected
+
+
+@dataclass
+class TerminationCheck:
+    """Result of termination condition check."""
+    should_terminate: bool
+    reason: str
+    items_answered: int
+    current_se: Optional[float]
+    max_items: Optional[int]
+    se_threshold_met: bool
+
+
+# Default SE threshold for termination
+DEFAULT_SE_THRESHOLD = 0.5
+# Default max items if not configured
+DEFAULT_MAX_ITEMS = 50
+
+
+async def get_next_item_fixed(
+    db: AsyncSession,
+    session_id: str,
+    tryout_id: str,
+    website_id: int,
+    level_filter: Optional[str] = None
+) -> NextItemResult:
+    """
+    Get next item in fixed order (CTT mode).
+    
+    Returns items in slot order (1, 2, 3, ...).
+    Filters by level if specified.
+    Checks if student already answered this item.
+    
+    Args:
+        db: Database session
+        session_id: Session identifier
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        level_filter: Optional difficulty level filter ('mudah', 'sedang', 'sulit')
+        
+    Returns:
+        NextItemResult with selected item or None if no more items
+    """
+    # Get session to find current position and answered items
+    session_query = select(Session).where(Session.session_id == session_id)
+    session_result = await db.execute(session_query)
+    session = session_result.scalar_one_or_none()
+    
+    if not session:
+        raise CATSelectionError(f"Session {session_id} not found")
+    
+    # Get all item IDs already answered by this user in this session
+    answered_query = select(UserAnswer.item_id).where(
+        UserAnswer.session_id == session_id
+    )
+    answered_result = await db.execute(answered_query)
+    answered_item_ids = [row[0] for row in answered_result.all()]
+    
+    # Build query for available items
+    query = (
+        select(Item)
+        .where(
+            Item.tryout_id == tryout_id,
+            Item.website_id == website_id
+        )
+        .order_by(Item.slot, Item.level)
+    )
+    
+    # Apply level filter if specified
+    if level_filter:
+        query = query.where(Item.level == level_filter)
+    
+    # Exclude already answered items
+    if answered_item_ids:
+        query = query.where(not_(Item.id.in_(answered_item_ids)))
+    
+    result = await db.execute(query)
+    items = result.scalars().all()
+    
+    if not items:
+        return NextItemResult(
+            item=None,
+            selection_method="fixed",
+            slot=None,
+            level=None,
+            reason="No more items available"
+        )
+    
+    # Return first available item (lowest slot)
+    next_item = items[0]
+    
+    return NextItemResult(
+        item=next_item,
+        selection_method="fixed",
+        slot=next_item.slot,
+        level=next_item.level,
+        reason=f"Fixed order selection - slot {next_item.slot}"
+    )
+
+
+async def get_next_item_adaptive(
+    db: AsyncSession,
+    session_id: str,
+    tryout_id: str,
+    website_id: int,
+    ai_generation_enabled: bool = False,
+    level_filter: Optional[str] = None
+) -> NextItemResult:
+    """
+    Get next item using adaptive selection (IRT mode).
+    
+    Finds item where b ≈ current theta.
+    Only uses calibrated items (calibrated=True).
+    Filters: student hasn't answered this item.
+    Filters: AI-generated items only if AI generation is enabled.
+    
+    Args:
+        db: Database session
+        session_id: Session identifier
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        ai_generation_enabled: Whether to include AI-generated items
+        level_filter: Optional difficulty level filter
+        
+    Returns:
+        NextItemResult with selected item or None if no suitable items
+    """
+    # Get session for current theta
+    session_query = select(Session).where(Session.session_id == session_id)
+    session_result = await db.execute(session_query)
+    session = session_result.scalar_one_or_none()
+    
+    if not session:
+        raise CATSelectionError(f"Session {session_id} not found")
+    
+    # Get current theta (default to 0.0 for first item)
+    current_theta = session.theta if session.theta is not None else 0.0
+    
+    # Get all item IDs already answered by this user in this session
+    answered_query = select(UserAnswer.item_id).where(
+        UserAnswer.session_id == session_id
+    )
+    answered_result = await db.execute(answered_query)
+    answered_item_ids = [row[0] for row in answered_result.all()]
+    
+    # Build query for available calibrated items
+    query = (
+        select(Item)
+        .where(
+            Item.tryout_id == tryout_id,
+            Item.website_id == website_id,
+            Item.calibrated == True  # Only calibrated items for IRT
+        )
+    )
+    
+    # Apply level filter if specified
+    if level_filter:
+        query = query.where(Item.level == level_filter)
+    
+    # Exclude already answered items
+    if answered_item_ids:
+        query = query.where(not_(Item.id.in_(answered_item_ids)))
+    
+    # Filter AI-generated items if AI generation is disabled
+    if not ai_generation_enabled:
+        query = query.where(Item.generated_by == 'manual')
+    
+    result = await db.execute(query)
+    items = result.scalars().all()
+    
+    if not items:
+        return NextItemResult(
+            item=None,
+            selection_method="adaptive",
+            slot=None,
+            level=None,
+            reason="No calibrated items available"
+        )
+    
+    # Find item with b closest to current theta
+    # Also consider item information (prefer items with higher information at current theta)
+    best_item = None
+    best_score = float('inf')
+    
+    for item in items:
+        if item.irt_b is None:
+            # Skip items without b parameter (shouldn't happen with calibrated=True)
+            continue
+        
+        # Calculate distance from theta
+        b_distance = abs(item.irt_b - current_theta)
+        
+        # Calculate item information at current theta
+        information = calculate_item_information(current_theta, item.irt_b)
+        
+        # Score: minimize distance, maximize information
+        # Use weighted combination: lower score is better
+        # Add small penalty for lower information
+        score = b_distance - (0.1 * information)
+        
+        if score < best_score:
+            best_score = score
+            best_item = item
+    
+    if not best_item:
+        return NextItemResult(
+            item=None,
+            selection_method="adaptive",
+            slot=None,
+            level=None,
+            reason="No items with valid IRT parameters available"
+        )
+    
+    return NextItemResult(
+        item=best_item,
+        selection_method="adaptive",
+        slot=best_item.slot,
+        level=best_item.level,
+        reason=f"Adaptive selection - b={best_item.irt_b:.3f} ≈ θ={current_theta:.3f}"
+    )
+
+
+async def get_next_item_hybrid(
+    db: AsyncSession,
+    session_id: str,
+    tryout_id: str,
+    website_id: int,
+    hybrid_transition_slot: int = 10,
+    ai_generation_enabled: bool = False,
+    level_filter: Optional[str] = None
+) -> NextItemResult:
+    """
+    Get next item using hybrid selection.
+    
+    Uses fixed order for first N items, then switches to adaptive.
+    Falls back to CTT if no calibrated items available.
+    
+    Args:
+        db: Database session
+        session_id: Session identifier
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        hybrid_transition_slot: Slot number to transition from fixed to adaptive
+        ai_generation_enabled: Whether to include AI-generated items
+        level_filter: Optional difficulty level filter
+        
+    Returns:
+        NextItemResult with selected item or None if no items available
+    """
+    # Get session to check current position
+    session_query = select(Session).where(Session.session_id == session_id)
+    session_result = await db.execute(session_query)
+    session = session_result.scalar_one_or_none()
+    
+    if not session:
+        raise CATSelectionError(f"Session {session_id} not found")
+    
+    # Count answered items to determine current position
+    count_query = select(func.count(UserAnswer.id)).where(
+        UserAnswer.session_id == session_id
+    )
+    count_result = await db.execute(count_query)
+    items_answered = count_result.scalar() or 0
+    
+    # Determine current slot (next slot to fill)
+    current_slot = items_answered + 1
+    
+    # Check if we're still in fixed phase
+    if current_slot <= hybrid_transition_slot:
+        # Use fixed selection for initial items
+        result = await get_next_item_fixed(
+            db, session_id, tryout_id, website_id, level_filter
+        )
+        result.selection_method = "hybrid_fixed"
+        result.reason = f"Hybrid mode (fixed phase) - slot {current_slot}"
+        return result
+    
+    # Try adaptive selection
+    adaptive_result = await get_next_item_adaptive(
+        db, session_id, tryout_id, website_id, ai_generation_enabled, level_filter
+    )
+    
+    if adaptive_result.item is not None:
+        adaptive_result.selection_method = "hybrid_adaptive"
+        adaptive_result.reason = f"Hybrid mode (adaptive phase) - {adaptive_result.reason}"
+        return adaptive_result
+    
+    # Fallback to fixed selection if no calibrated items available
+    fixed_result = await get_next_item_fixed(
+        db, session_id, tryout_id, website_id, level_filter
+    )
+    fixed_result.selection_method = "hybrid_fallback"
+    fixed_result.reason = f"Hybrid mode (CTT fallback) - {fixed_result.reason}"
+    return fixed_result
+
+
+async def update_theta(
+    db: AsyncSession,
+    session_id: str,
+    item_id: int,
+    is_correct: bool
+) -> tuple[float, float]:
+    """
+    Update session theta estimate based on response.
+    
+    Calls estimate_theta from irt_calibration.py.
+    Updates session.theta and session.theta_se.
+    Handles initial theta (uses 0.0 for first item).
+    Clamps theta to [-3, +3].
+    
+    Args:
+        db: Database session
+        session_id: Session identifier
+        item_id: Item that was answered
+        is_correct: Whether the answer was correct
+        
+    Returns:
+        Tuple of (theta, theta_se)
+    """
+    return await update_theta_after_response(db, session_id, item_id, is_correct)
+
+
+async def should_terminate(
+    db: AsyncSession,
+    session_id: str,
+    max_items: Optional[int] = None,
+    se_threshold: float = DEFAULT_SE_THRESHOLD
+) -> TerminationCheck:
+    """
+    Check if session should terminate.
+    
+    Termination conditions:
+    - Reached max_items
+    - Reached SE threshold (theta_se < se_threshold)
+    - No more items available
+    
+    Args:
+        db: Database session
+        session_id: Session identifier
+        max_items: Maximum items allowed (None = no limit)
+        se_threshold: SE threshold for termination
+        
+    Returns:
+        TerminationCheck with termination status and reason
+    """
+    # Get session
+    session_query = select(Session).where(Session.session_id == session_id)
+    session_result = await db.execute(session_query)
+    session = session_result.scalar_one_or_none()
+    
+    if not session:
+        raise CATSelectionError(f"Session {session_id} not found")
+    
+    # Count answered items
+    count_query = select(func.count(UserAnswer.id)).where(
+        UserAnswer.session_id == session_id
+    )
+    count_result = await db.execute(count_query)
+    items_answered = count_result.scalar() or 0
+    
+    # Check max items
+    max_items_reached = False
+    if max_items is not None and items_answered >= max_items:
+        max_items_reached = True
+    
+    # Check SE threshold
+    current_se = session.theta_se
+    se_threshold_met = False
+    if current_se is not None and current_se < se_threshold:
+        se_threshold_met = True
+    
+    # Check if we have enough items for SE threshold (at least 15 items per PRD)
+    min_items_for_se = 15
+    se_threshold_met = se_threshold_met and items_answered >= min_items_for_se
+    
+    # Determine termination
+    should_term = max_items_reached or se_threshold_met
+    
+    # Build reason
+    reasons = []
+    if max_items_reached:
+        reasons.append(f"max items reached ({items_answered}/{max_items})")
+    if se_threshold_met:
+        reasons.append(f"SE threshold met ({current_se:.3f} < {se_threshold})")
+    
+    if not reasons:
+        reasons.append("continuing")
+    
+    return TerminationCheck(
+        should_terminate=should_term,
+        reason="; ".join(reasons),
+        items_answered=items_answered,
+        current_se=current_se,
+        max_items=max_items,
+        se_threshold_met=se_threshold_met
+    )
+
+
+async def get_next_item(
+    db: AsyncSession,
+    session_id: str,
+    selection_mode: Literal["fixed", "adaptive", "hybrid"] = "fixed",
+    hybrid_transition_slot: int = 10,
+    ai_generation_enabled: bool = False,
+    level_filter: Optional[str] = None
+) -> NextItemResult:
+    """
+    Get next item based on selection mode.
+    
+    Main entry point for item selection.
+    
+    Args:
+        db: Database session
+        session_id: Session identifier
+        selection_mode: Selection mode ('fixed', 'adaptive', 'hybrid')
+        hybrid_transition_slot: Slot to transition in hybrid mode
+        ai_generation_enabled: Whether AI generation is enabled
+        level_filter: Optional difficulty level filter
+        
+    Returns:
+        NextItemResult with selected item
+    """
+    # Get session for tryout info
+    session_query = select(Session).where(Session.session_id == session_id)
+    session_result = await db.execute(session_query)
+    session = session_result.scalar_one_or_none()
+    
+    if not session:
+        raise CATSelectionError(f"Session {session_id} not found")
+    
+    tryout_id = session.tryout_id
+    website_id = session.website_id
+    
+    if selection_mode == "fixed":
+        return await get_next_item_fixed(
+            db, session_id, tryout_id, website_id, level_filter
+        )
+    elif selection_mode == "adaptive":
+        return await get_next_item_adaptive(
+            db, session_id, tryout_id, website_id, ai_generation_enabled, level_filter
+        )
+    elif selection_mode == "hybrid":
+        return await get_next_item_hybrid(
+            db, session_id, tryout_id, website_id,
+            hybrid_transition_slot, ai_generation_enabled, level_filter
+        )
+    else:
+        raise CATSelectionError(f"Unknown selection mode: {selection_mode}")
+
+
+async def check_user_level_reuse(
+    db: AsyncSession,
+    wp_user_id: str,
+    website_id: int,
+    tryout_id: str,
+    slot: int,
+    level: str
+) -> bool:
+    """
+    Check if user has already answered a question at this difficulty level.
+    
+    Per PRD FR-5.3: Check if student user_id already answered question
+    at specific difficulty level.
+    
+    Args:
+        db: Database session
+        wp_user_id: WordPress user ID
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+        slot: Question slot
+        level: Difficulty level
+        
+    Returns:
+        True if user has answered at this level, False otherwise
+    """
+    # Check if user has answered any item at this slot/level combination
+    query = (
+        select(func.count(UserAnswer.id))
+        .join(Item, UserAnswer.item_id == Item.id)
+        .where(
+            UserAnswer.wp_user_id == wp_user_id,
+            UserAnswer.website_id == website_id,
+            UserAnswer.tryout_id == tryout_id,
+            Item.slot == slot,
+            Item.level == level
+        )
+    )
+    
+    result = await db.execute(query)
+    count = result.scalar() or 0
+    
+    return count > 0
+
+
+async def get_available_levels_for_slot(
+    db: AsyncSession,
+    tryout_id: str,
+    website_id: int,
+    slot: int
+) -> list[str]:
+    """
+    Get available difficulty levels for a specific slot.
+    
+    Args:
+        db: Database session
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        slot: Question slot
+        
+    Returns:
+        List of available levels
+    """
+    query = (
+        select(Item.level)
+        .where(
+            Item.tryout_id == tryout_id,
+            Item.website_id == website_id,
+            Item.slot == slot
+        )
+        .distinct()
+    )
+    
+    result = await db.execute(query)
+    levels = [row[0] for row in result.all()]
+    
+    return levels
+
+
+# Admin playground functions for testing CAT behavior
+
+async def simulate_cat_selection(
+    db: AsyncSession,
+    tryout_id: str,
+    website_id: int,
+    initial_theta: float = 0.0,
+    selection_mode: Literal["fixed", "adaptive", "hybrid"] = "adaptive",
+    max_items: int = 15,
+    se_threshold: float = DEFAULT_SE_THRESHOLD,
+    hybrid_transition_slot: int = 10
+) -> dict:
+    """
+    Simulate CAT selection for admin testing.
+    
+    Returns sequence of selected items with b values and theta progression.
+    
+    Args:
+        db: Database session
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        initial_theta: Starting theta value
+        selection_mode: Selection mode to use
+        max_items: Maximum items to simulate
+        se_threshold: SE threshold for termination
+        hybrid_transition_slot: Slot to transition in hybrid mode
+        
+    Returns:
+        Dict with simulation results
+    """
+    # Get all items for this tryout
+    items_query = (
+        select(Item)
+        .where(
+            Item.tryout_id == tryout_id,
+            Item.website_id == website_id
+        )
+        .order_by(Item.slot)
+    )
+    
+    items_result = await db.execute(items_query)
+    all_items = list(items_result.scalars().all())
+    
+    if not all_items:
+        return {
+            "error": "No items found for this tryout",
+            "tryout_id": tryout_id,
+            "website_id": website_id
+        }
+    
+    # Simulate selection
+    selected_items = []
+    current_theta = initial_theta
+    current_se = 3.0  # Start with high uncertainty
+    used_item_ids = set()
+    
+    for i in range(max_items):
+        # Get available items
+        available_items = [item for item in all_items if item.id not in used_item_ids]
+        
+        if not available_items:
+            break
+        
+        # Select based on mode
+        if selection_mode == "adaptive":
+            # Filter to calibrated items only
+            calibrated_items = [item for item in available_items if item.calibrated and item.irt_b is not None]
+            
+            if not calibrated_items:
+                # Fallback to any available item
+                calibrated_items = available_items
+            
+            # Find item closest to current theta
+            best_item = min(
+                calibrated_items,
+                key=lambda item: abs((item.irt_b or 0) - current_theta)
+            )
+        elif selection_mode == "fixed":
+            # Select in slot order
+            best_item = min(available_items, key=lambda item: item.slot)
+        else:  # hybrid
+            if i < hybrid_transition_slot:
+                best_item = min(available_items, key=lambda item: item.slot)
+            else:
+                calibrated_items = [item for item in available_items if item.calibrated and item.irt_b is not None]
+                if calibrated_items:
+                    best_item = min(
+                        calibrated_items,
+                        key=lambda item: abs((item.irt_b or 0) - current_theta)
+                    )
+                else:
+                    best_item = min(available_items, key=lambda item: item.slot)
+        
+        used_item_ids.add(best_item.id)
+        
+        # Simulate response (random based on probability)
+        import random
+        b = best_item.irt_b or estimate_b_from_ctt_p(best_item.ctt_p) if best_item.ctt_p else 0.0
+        p_correct = 1.0 / (1.0 + math.exp(-(current_theta - b)))
+        is_correct = random.random() < p_correct
+        
+        # Update theta (simplified)
+        responses = [1 if item.get('is_correct', True) else 0 for item in selected_items]
+        responses.append(1 if is_correct else 0)
+        b_params = [item['b'] for item in selected_items]
+        b_params.append(b)
+        
+        new_theta, new_se = estimate_theta_mle(responses, b_params, current_theta)
+        current_theta = new_theta
+        current_se = new_se
+        
+        selected_items.append({
+            "slot": best_item.slot,
+            "level": best_item.level,
+            "b": b,
+            "is_correct": is_correct,
+            "theta_after": current_theta,
+            "se_after": current_se,
+            "calibrated": best_item.calibrated
+        })
+        
+        # Check SE threshold
+        if current_se < se_threshold and i >= 14:  # At least 15 items
+            break
+    
+    return {
+        "tryout_id": tryout_id,
+        "website_id": website_id,
+        "initial_theta": initial_theta,
+        "selection_mode": selection_mode,
+        "total_items": len(selected_items),
+        "final_theta": current_theta,
+        "final_se": current_se,
+        "se_threshold_met": current_se < se_threshold,
+        "items": selected_items
+    }
--- a/app/services/config_management.py
+++ b/app/services/config_management.py
@@ -0,0 +1,431 @@
+"""
+Configuration Management Service.
+
+Provides functions to retrieve and update tryout configurations.
+Handles configuration changes for scoring, selection, and normalization modes.
+"""
+
+import logging
+from typing import Any, Dict, Literal, Optional
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.tryout import Tryout
+from app.models.tryout_stats import TryoutStats
+
+logger = logging.getLogger(__name__)
+
+
+async def get_config(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> Tryout:
+    """
+    Fetch tryout configuration for a specific tryout.
+
+    Returns all configuration fields including scoring_mode, selection_mode,
+    normalization_mode, and other settings.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Tryout model with all configuration fields
+
+    Raises:
+        ValueError: If tryout not found
+    """
+    result = await db.execute(
+        select(Tryout).where(
+            Tryout.website_id == website_id,
+            Tryout.tryout_id == tryout_id,
+        )
+    )
+    tryout = result.scalar_one_or_none()
+
+    if tryout is None:
+        raise ValueError(
+            f"Tryout {tryout_id} not found for website {website_id}"
+        )
+
+    return tryout
+
+
+async def update_config(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+    config_updates: Dict[str, Any],
+) -> Tryout:
+    """
+    Update tryout configuration with provided fields.
+
+    Accepts a dictionary of configuration updates and applies them to the
+    tryout configuration. Only provided fields are updated.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+        config_updates: Dictionary of configuration fields to update
+
+    Returns:
+        Updated Tryout model
+
+    Raises:
+        ValueError: If tryout not found or invalid field provided
+    """
+    # Fetch tryout
+    result = await db.execute(
+        select(Tryout).where(
+            Tryout.website_id == website_id,
+            Tryout.tryout_id == tryout_id,
+        )
+    )
+    tryout = result.scalar_one_or_none()
+
+    if tryout is None:
+        raise ValueError(
+            f"Tryout {tryout_id} not found for website {website_id}"
+        )
+
+    # Valid configuration fields
+    valid_fields = {
+        "name", "description",
+        "scoring_mode", "selection_mode", "normalization_mode",
+        "min_sample_for_dynamic", "static_rataan", "static_sb",
+        "ai_generation_enabled",
+        "hybrid_transition_slot",
+        "min_calibration_sample", "theta_estimation_method", "fallback_to_ctt_on_error",
+    }
+
+    # Update only valid fields
+    updated_fields = []
+    for field, value in config_updates.items():
+        if field not in valid_fields:
+            logger.warning(f"Skipping invalid config field: {field}")
+            continue
+
+        setattr(tryout, field, value)
+        updated_fields.append(field)
+
+    if not updated_fields:
+        logger.warning(f"No valid config fields to update for tryout {tryout_id}")
+
+    await db.flush()
+
+    logger.info(
+        f"Updated config for tryout {tryout_id}, website {website_id}: "
+        f"{', '.join(updated_fields)}"
+    )
+
+    return tryout
+
+
+async def toggle_normalization_mode(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+    new_mode: Literal["static", "dynamic", "hybrid"],
+) -> Tryout:
+    """
+    Toggle normalization mode for a tryout.
+
+    Updates the normalization_mode field. If switching to "auto" (dynamic mode),
+    checks if threshold is met and logs appropriate warnings.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+        new_mode: New normalization mode ("static", "dynamic", "hybrid")
+
+    Returns:
+        Updated Tryout model
+
+    Raises:
+        ValueError: If tryout not found or invalid mode provided
+    """
+    if new_mode not in ["static", "dynamic", "hybrid"]:
+        raise ValueError(
+            f"Invalid normalization_mode: {new_mode}. "
+            "Must be 'static', 'dynamic', or 'hybrid'"
+        )
+
+    # Fetch tryout with stats
+    result = await db.execute(
+        select(Tryout).where(
+            Tryout.website_id == website_id,
+            Tryout.tryout_id == tryout_id,
+        )
+    )
+    tryout = result.scalar_one_or_none()
+
+    if tryout is None:
+        raise ValueError(
+            f"Tryout {tryout_id} not found for website {website_id}"
+        )
+
+    old_mode = tryout.normalization_mode
+    tryout.normalization_mode = new_mode
+
+    # Fetch stats for participant count
+    stats_result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = stats_result.scalar_one_or_none()
+    participant_count = stats.participant_count if stats else 0
+    min_sample = tryout.min_sample_for_dynamic
+
+    # Log warnings and suggestions based on mode change
+    if new_mode == "dynamic":
+        if participant_count < min_sample:
+            logger.warning(
+                f"Switching to dynamic normalization with only {participant_count} "
+                f"participants (threshold: {min_sample}). "
+                "Dynamic normalization may produce unreliable results."
+            )
+        else:
+            logger.info(
+                f"Switching to dynamic normalization with {participant_count} "
+                f"participants (threshold: {min_sample}). "
+                "Ready for dynamic normalization."
+            )
+
+    elif new_mode == "hybrid":
+        if participant_count >= min_sample:
+            logger.info(
+                f"Switching to hybrid normalization with {participant_count} "
+                f"participants (threshold: {min_sample}). "
+                "Will use dynamic normalization immediately."
+            )
+        else:
+            logger.info(
+                f"Switching to hybrid normalization with {participant_count} "
+                f"participants (threshold: {min_sample}). "
+                f"Will use static normalization until {min_sample} participants reached."
+            )
+
+    await db.flush()
+
+    logger.info(
+        f"Toggled normalization mode for tryout {tryout_id}, "
+        f"website {website_id}: {old_mode} -> {new_mode}"
+    )
+
+    return tryout
+
+
+async def get_normalization_config(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> Dict[str, Any]:
+    """
+    Get normalization configuration summary.
+
+    Returns current normalization mode, static values, dynamic values,
+    participant count, and threshold status.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Dictionary with normalization configuration summary
+
+    Raises:
+        ValueError: If tryout not found
+    """
+    # Fetch tryout config
+    tryout = await get_config(db, website_id, tryout_id)
+
+    # Fetch stats
+    stats_result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = stats_result.scalar_one_or_none()
+
+    # Determine threshold status
+    participant_count = stats.participant_count if stats else 0
+    min_sample = tryout.min_sample_for_dynamic
+    threshold_ready = participant_count >= min_sample
+    participants_needed = max(0, min_sample - participant_count)
+
+    # Determine current effective mode
+    current_mode = tryout.normalization_mode
+    if current_mode == "hybrid":
+        effective_mode = "dynamic" if threshold_ready else "static"
+    else:
+        effective_mode = current_mode
+
+    return {
+        "tryout_id": tryout_id,
+        "normalization_mode": current_mode,
+        "effective_mode": effective_mode,
+        "static_rataan": tryout.static_rataan,
+        "static_sb": tryout.static_sb,
+        "dynamic_rataan": stats.rataan if stats else None,
+        "dynamic_sb": stats.sb if stats else None,
+        "participant_count": participant_count,
+        "min_sample_for_dynamic": min_sample,
+        "threshold_ready": threshold_ready,
+        "participants_needed": participants_needed,
+    }
+
+
+async def reset_normalization_stats(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> TryoutStats:
+    """
+    Reset TryoutStats to initial values.
+
+    Resets participant_count to 0 and clears running sums.
+    Switches normalization_mode to "static" temporarily.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Reset TryoutStats record
+
+    Raises:
+        ValueError: If tryout not found
+    """
+    # Fetch tryout
+    tryout_result = await db.execute(
+        select(Tryout).where(
+            Tryout.website_id == website_id,
+            Tryout.tryout_id == tryout_id,
+        )
+    )
+    tryout = tryout_result.scalar_one_or_none()
+
+    if tryout is None:
+        raise ValueError(
+            f"Tryout {tryout_id} not found for website {website_id}"
+        )
+
+    # Switch to static mode temporarily
+    tryout.normalization_mode = "static"
+
+    # Fetch or create stats
+    stats_result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = stats_result.scalar_one_or_none()
+
+    if stats is None:
+        # Create new empty stats record
+        stats = TryoutStats(
+            website_id=website_id,
+            tryout_id=tryout_id,
+            participant_count=0,
+            total_nm_sum=0.0,
+            total_nm_sq_sum=0.0,
+            rataan=None,
+            sb=None,
+            min_nm=None,
+            max_nm=None,
+        )
+        db.add(stats)
+    else:
+        # Reset existing stats
+        stats.participant_count = 0
+        stats.total_nm_sum = 0.0
+        stats.total_nm_sq_sum = 0.0
+        stats.rataan = None
+        stats.sb = None
+        stats.min_nm = None
+        stats.max_nm = None
+
+    await db.flush()
+
+    logger.info(
+        f"Reset normalization stats for tryout {tryout_id}, "
+        f"website {website_id}. Normalization mode switched to static."
+    )
+
+    return stats
+
+
+async def get_full_config(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> Dict[str, Any]:
+    """
+    Get full tryout configuration including stats.
+
+    Returns all configuration fields plus current statistics.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Dictionary with full configuration and stats
+
+    Raises:
+        ValueError: If tryout not found
+    """
+    # Fetch tryout config
+    tryout = await get_config(db, website_id, tryout_id)
+
+    # Fetch stats
+    stats_result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = stats_result.scalar_one_or_none()
+
+    # Build config dictionary
+    config = {
+        "tryout_id": tryout.tryout_id,
+        "name": tryout.name,
+        "description": tryout.description,
+        "scoring_mode": tryout.scoring_mode,
+        "selection_mode": tryout.selection_mode,
+        "normalization_mode": tryout.normalization_mode,
+        "min_sample_for_dynamic": tryout.min_sample_for_dynamic,
+        "static_rataan": tryout.static_rataan,
+        "static_sb": tryout.static_sb,
+        "ai_generation_enabled": tryout.ai_generation_enabled,
+        "hybrid_transition_slot": tryout.hybrid_transition_slot,
+        "min_calibration_sample": tryout.min_calibration_sample,
+        "theta_estimation_method": tryout.theta_estimation_method,
+        "fallback_to_ctt_on_error": tryout.fallback_to_ctt_on_error,
+        "stats": {
+            "participant_count": stats.participant_count if stats else 0,
+            "rataan": stats.rataan if stats else None,
+            "sb": stats.sb if stats else None,
+            "min_nm": stats.min_nm if stats else None,
+            "max_nm": stats.max_nm if stats else None,
+            "last_calculated": stats.last_calculated if stats else None,
+        },
+        "created_at": tryout.created_at,
+        "updated_at": tryout.updated_at,
+    }
+
+    return config
--- a/app/services/ctt_scoring.py
+++ b/app/services/ctt_scoring.py
@@ -0,0 +1,385 @@
+"""
+CTT (Classical Test Theory) Scoring Engine.
+
+Implements exact Excel formulas for:
+- p-value (Tingkat Kesukaran): p = Σ Benar / Total Peserta
+- Bobot (Weight): Bobot = 1 - p
+- NM (Nilai Mentah): NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000
+- NN (Nilai Nasional): NN = 500 + 100 × ((NM - Rataan) / SB)
+
+All formulas match PRD Section 13.1 exactly.
+"""
+
+import math
+from datetime import datetime, timezone
+from typing import Optional
+
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.item import Item
+from app.models.tryout_stats import TryoutStats
+from app.models.user_answer import UserAnswer
+
+
+def calculate_ctt_p(total_correct: int, total_participants: int) -> float:
+    """
+    Calculate CTT p-value (Tingkat Kesukaran / Difficulty).
+
+    Formula: p = Σ Benar / Total Peserta
+
+    Args:
+        total_correct: Number of correct answers (Σ Benar)
+        total_participants: Total number of participants (Total Peserta)
+
+    Returns:
+        p-value in range [0.0, 1.0]
+
+    Raises:
+        ValueError: If total_participants is 0 or values are invalid
+    """
+    if total_participants <= 0:
+        raise ValueError("total_participants must be greater than 0")
+    if total_correct < 0:
+        raise ValueError("total_correct cannot be negative")
+    if total_correct > total_participants:
+        raise ValueError("total_correct cannot exceed total_participants")
+
+    p = total_correct / total_participants
+
+    # Clamp to valid range [0, 1]
+    return max(0.0, min(1.0, p))
+
+
+def calculate_ctt_bobot(p_value: float) -> float:
+    """
+    Calculate CTT bobot (weight) from p-value.
+
+    Formula: Bobot = 1 - p
+
+    Interpretation:
+    - Easy questions (p > 0.70) have low bobot (< 0.30)
+    - Difficult questions (p < 0.30) have high bobot (> 0.70)
+    - Medium questions (0.30 ≤ p ≤ 0.70) have moderate bobot
+
+    Args:
+        p_value: CTT p-value in range [0.0, 1.0]
+
+    Returns:
+        bobot (weight) in range [0.0, 1.0]
+
+    Raises:
+        ValueError: If p_value is outside [0, 1] range
+    """
+    if not 0.0 <= p_value <= 1.0:
+        raise ValueError(f"p_value must be in range [0, 1], got {p_value}")
+
+    bobot = 1.0 - p_value
+
+    # Clamp to valid range [0, 1]
+    return max(0.0, min(1.0, bobot))
+
+
+def calculate_ctt_nm(total_bobot_siswa: float, total_bobot_max: float) -> int:
+    """
+    Calculate CTT NM (Nilai Mentah / Raw Score).
+
+    Formula: NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000
+
+    This is equivalent to Excel's SUMPRODUCT calculation where:
+    - Total_Bobot_Siswa = Σ(bobot_earned for each correct answer)
+    - Total_Bobot_Max = Σ(bobot for all questions)
+
+    Args:
+        total_bobot_siswa: Total weight earned by student
+        total_bobot_max: Maximum possible weight (sum of all item bobots)
+
+    Returns:
+        NM (raw score) in range [0, 1000]
+
+    Raises:
+        ValueError: If total_bobot_max is 0 or values are invalid
+    """
+    if total_bobot_max <= 0:
+        raise ValueError("total_bobot_max must be greater than 0")
+    if total_bobot_siswa < 0:
+        raise ValueError("total_bobot_siswa cannot be negative")
+
+    nm = (total_bobot_siswa / total_bobot_max) * 1000
+
+    # Round to integer and clamp to valid range [0, 1000]
+    nm_int = round(nm)
+    return max(0, min(1000, nm_int))
+
+
+def calculate_ctt_nn(nm: int, rataan: float, sb: float) -> int:
+    """
+    Calculate CTT NN (Nilai Nasional / Normalized Score).
+
+    Formula: NN = 500 + 100 × ((NM - Rataan) / SB)
+
+    Normalizes scores to mean=500, SD=100 distribution.
+
+    Args:
+        nm: Nilai Mentah (raw score) in range [0, 1000]
+        rataan: Mean of NM scores
+        sb: Standard deviation of NM scores (Simpangan Baku)
+
+    Returns:
+        NN (normalized score) in range [0, 1000]
+
+    Raises:
+        ValueError: If nm is out of range or sb is invalid
+    """
+    if not 0 <= nm <= 1000:
+        raise ValueError(f"nm must be in range [0, 1000], got {nm}")
+    if sb <= 0:
+        # If SD is 0 or negative, return default normalized score
+        # This handles edge case where all scores are identical
+        return 500
+
+    # Calculate normalized score
+    z_score = (nm - rataan) / sb
+    nn = 500 + 100 * z_score
+
+    # Round to integer and clamp to valid range [0, 1000]
+    nn_int = round(nn)
+    return max(0, min(1000, nn_int))
+
+
+def categorize_difficulty(p_value: float) -> str:
+    """
+    Categorize question difficulty based on CTT p-value.
+
+    Categories per CTT standards (PRD Section 13.2):
+    - p < 0.30 → Sukar (Sulit)
+    - 0.30 ≤ p ≤ 0.70 → Sedang
+    - p > 0.70 → Mudah
+
+    Args:
+        p_value: CTT p-value in range [0.0, 1.0]
+
+    Returns:
+        Difficulty category: "mudah", "sedang", or "sulit"
+    """
+    if p_value > 0.70:
+        return "mudah"
+    elif p_value >= 0.30:
+        return "sedang"
+    else:
+        return "sulit"
+
+
+async def calculate_ctt_p_for_item(
+    db: AsyncSession, item_id: int
+) -> Optional[float]:
+    """
+    Calculate CTT p-value for a specific item from existing responses.
+
+    Queries all UserAnswer records for the item to calculate:
+    p = Σ Benar / Total Peserta
+
+    Args:
+        db: Async database session
+        item_id: Item ID to calculate p-value for
+
+    Returns:
+        p-value in range [0.0, 1.0], or None if no responses exist
+    """
+    # Count total responses and correct responses
+    result = await db.execute(
+        select(
+            func.count().label("total"),
+            func.sum(func.cast(UserAnswer.is_correct, type_=func.INTEGER)).label("correct"),
+        ).where(UserAnswer.item_id == item_id)
+    )
+    row = result.first()
+
+    if row is None or row.total == 0:
+        return None
+
+    return calculate_ctt_p(row.correct or 0, row.total)
+
+
+async def update_tryout_stats(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+    nm: int,
+) -> TryoutStats:
+    """
+    Incrementally update TryoutStats with new NM score.
+
+    Updates:
+    - participant_count += 1
+    - total_nm_sum += nm
+    - total_nm_sq_sum += nm²
+    - Recalculates rataan (mean) and sb (standard deviation)
+    - Updates min_nm and max_nm if applicable
+
+    Uses Welford's online algorithm for numerically stable variance calculation.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+        nm: New NM score to add
+
+    Returns:
+        Updated TryoutStats record
+    """
+    # Get or create TryoutStats
+    result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = result.scalar_one_or_none()
+
+    if stats is None:
+        # Create new stats record
+        stats = TryoutStats(
+            website_id=website_id,
+            tryout_id=tryout_id,
+            participant_count=1,
+            total_nm_sum=float(nm),
+            total_nm_sq_sum=float(nm * nm),
+            rataan=float(nm),
+            sb=0.0,  # SD is 0 for single data point
+            min_nm=nm,
+            max_nm=nm,
+            last_calculated=datetime.now(timezone.utc),
+        )
+        db.add(stats)
+    else:
+        # Incrementally update existing stats
+        stats.participant_count += 1
+        stats.total_nm_sum += nm
+        stats.total_nm_sq_sum += nm * nm
+
+        # Update min/max
+        if stats.min_nm is None or nm < stats.min_nm:
+            stats.min_nm = nm
+        if stats.max_nm is None or nm > stats.max_nm:
+            stats.max_nm = nm
+
+        # Recalculate mean and SD
+        n = stats.participant_count
+        sum_nm = stats.total_nm_sum
+        sum_nm_sq = stats.total_nm_sq_sum
+
+        # Mean = Σ NM / n
+        stats.rataan = sum_nm / n
+
+        # Variance = (Σ NM² / n) - (mean)²
+        # Using population standard deviation
+        if n > 1:
+            variance = (sum_nm_sq / n) - (stats.rataan ** 2)
+            # Clamp variance to non-negative (handles floating point errors)
+            variance = max(0.0, variance)
+            stats.sb = math.sqrt(variance)
+        else:
+            stats.sb = 0.0
+
+        stats.last_calculated = datetime.now(timezone.utc)
+
+    await db.flush()
+    return stats
+
+
+async def get_total_bobot_max(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+    level: str = "sedang",
+) -> float:
+    """
+    Calculate total maximum bobot for a tryout.
+
+    Total_Bobot_Max = Σ bobot for all questions in the tryout
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+        level: Difficulty level to filter by (default: "sedang")
+
+    Returns:
+        Sum of all item bobots
+
+    Raises:
+        ValueError: If no items found or items have no bobot values
+    """
+    result = await db.execute(
+        select(func.sum(Item.ctt_bobot)).where(
+            Item.website_id == website_id,
+            Item.tryout_id == tryout_id,
+            Item.level == level,
+        )
+    )
+    total_bobot = result.scalar()
+
+    if total_bobot is None or total_bobot == 0:
+        raise ValueError(
+            f"No items with bobot found for tryout {tryout_id}, level {level}"
+        )
+
+    return float(total_bobot)
+
+
+def convert_ctt_p_to_irt_b(p_value: float) -> float:
+    """
+    Convert CTT p-value to IRT difficulty parameter (b).
+
+    Formula: b ≈ -ln((1-p)/p)
+
+    This provides an initial estimate for IRT calibration.
+    Maps p ∈ (0, 1) to b ∈ (-∞, +∞), typically [-3, +3].
+
+    Args:
+        p_value: CTT p-value in range (0.0, 1.0)
+
+    Returns:
+        IRT b-parameter estimate
+
+    Raises:
+        ValueError: If p_value is at boundaries (0 or 1)
+    """
+    if p_value <= 0.0 or p_value >= 1.0:
+        # Handle edge cases by clamping
+        if p_value <= 0.0:
+            return 3.0  # Very difficult
+        else:
+            return -3.0  # Very easy
+
+    # b ≈ -ln((1-p)/p)
+    odds_ratio = (1 - p_value) / p_value
+    b = -math.log(odds_ratio)
+
+    # Clamp to valid IRT range [-3, +3]
+    return max(-3.0, min(3.0, b))
+
+
+def map_theta_to_nn(theta: float) -> int:
+    """
+    Map IRT theta (ability) to NN score for comparison.
+
+    Formula: NN = 500 + (θ / 3) × 500
+
+    Maps θ ∈ [-3, +3] to NN ∈ [0, 1000].
+
+    Args:
+        theta: IRT ability estimate in range [-3.0, +3.0]
+
+    Returns:
+        NN score in range [0, 1000]
+    """
+    # Clamp theta to valid range
+    theta_clamped = max(-3.0, min(3.0, theta))
+
+    # Map to NN
+    nn = 500 + (theta_clamped / 3) * 500
+
+    # Round and clamp to valid range
+    return max(0, min(1000, round(nn)))
--- a/app/services/excel_import.py
+++ b/app/services/excel_import.py
@@ -0,0 +1,521 @@
+"""
+Excel Import/Export Service for Question Migration.
+
+Handles import from standardized Excel format with:
+- Row 2: KUNCI (answer key)
+- Row 4: TK (tingkat kesukaran p-value)
+- Row 5: BOBOT (weight 1-p)
+- Rows 6+: Individual question data
+
+Ensures 100% data integrity with comprehensive validation.
+"""
+
+import os
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+import openpyxl
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.item import Item
+from app.services.ctt_scoring import (
+    convert_ctt_p_to_irt_b,
+    categorize_difficulty,
+)
+
+
+def validate_excel_structure(file_path: str) -> Dict[str, Any]:
+    """
+    Validate Excel file structure against required format.
+
+    Checks:
+    - File exists and is valid Excel (.xlsx)
+    - Sheet "CONTOH" exists
+    - Required rows exist (Row 2 KUNCI, Row 4 TK, Row 5 BOBOT)
+    - Question data rows have required columns
+
+    Args:
+        file_path: Path to Excel file
+
+    Returns:
+        Dict with:
+            - valid: bool - Whether structure is valid
+            - errors: List[str] - Validation errors if any
+    """
+    errors: List[str] = []
+
+    # Check file exists
+    if not os.path.exists(file_path):
+        return {"valid": False, "errors": [f"File not found: {file_path}"]}
+
+    # Check file extension
+    if not file_path.lower().endswith('.xlsx'):
+        return {"valid": False, "errors": ["File must be .xlsx format"]}
+
+    try:
+        wb = openpyxl.load_workbook(file_path, data_only=False)
+    except Exception as e:
+        return {"valid": False, "errors": [f"Failed to load Excel file: {str(e)}"]}
+
+    # Check sheet "CONTOH" exists
+    if "CONTOH" not in wb.sheetnames:
+        return {
+            "valid": False,
+            "errors": ['Sheet "CONTOH" not found. Available sheets: ' + ", ".join(wb.sheetnames)]
+        }
+
+    ws = wb["CONTOH"]
+
+    # Check minimum rows exist
+    if ws.max_row < 6:
+        errors.append(f"Excel file must have at least 6 rows (found {ws.max_row})")
+
+    # Check Row 2 exists (KUNCI)
+    if ws.max_row < 2:
+        errors.append("Row 2 (KUNCI - answer key) is required")
+
+    # Check Row 4 exists (TK - p-values)
+    if ws.max_row < 4:
+        errors.append("Row 4 (TK - p-values) is required")
+
+    # Check Row 5 exists (BOBOT - weights)
+    if ws.max_row < 5:
+        errors.append("Row 5 (BOBOT - weights) is required")
+
+    # Check question data rows exist (6+)
+    if ws.max_row < 6:
+        errors.append("Question data rows (6+) are required")
+
+    # Check minimum columns (at least slot, level, soal_text, options, correct_answer)
+    if ws.max_column < 8:
+        errors.append(
+            f"Excel file must have at least 8 columns (found {ws.max_column}). "
+            "Expected: slot, level, soal_text, options_A, options_B, options_C, options_D, correct_answer"
+        )
+
+    # Check KUNCI row has values
+    if ws.max_row >= 2:
+        kunce_row_values = [ws.cell(2, col).value for col in range(4, ws.max_column + 1)]
+        if not any(v for v in kunce_row_values if v and v != "KUNCI"):
+            errors.append("Row 2 (KUNCI) must contain answer key values")
+
+    # Check TK row has numeric values
+    if ws.max_row >= 4:
+        wb_data = openpyxl.load_workbook(file_path, data_only=True)
+        ws_data = wb_data["CONTOH"]
+        tk_row_values = [ws_data.cell(4, col).value for col in range(4, ws.max_column + 1)]
+        if not any(v for v in tk_row_values if isinstance(v, (int, float))):
+            errors.append("Row 4 (TK) must contain numeric p-values")
+
+    # Check BOBOT row has numeric values
+    if ws.max_row >= 5:
+        wb_data = openpyxl.load_workbook(file_path, data_only=True)
+        ws_data = wb_data["CONTOH"]
+        bobot_row_values = [ws_data.cell(5, col).value for col in range(4, ws.max_column + 1)]
+        if not any(v for v in bobot_row_values if isinstance(v, (int, float))):
+            errors.append("Row 5 (BOBOT) must contain numeric weight values")
+
+    return {"valid": len(errors) == 0, "errors": errors}
+
+
+def parse_excel_import(
+    file_path: str,
+    website_id: int,
+    tryout_id: str
+) -> Dict[str, Any]:
+    """
+    Parse Excel file and extract items with full validation.
+
+    Excel structure:
+    - Sheet name: "CONTOH"
+    - Row 2: KUNCI (answer key) - extract correct answers per slot
+    - Row 4: TK (tingkat kesukaran p-value) - extract p-values per slot
+    - Row 5: BOBOT (weight 1-p) - extract bobot per slot
+    - Rows 6+: Individual question data
+
+    Args:
+        file_path: Path to Excel file
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Dict with:
+            - items: List[Dict] - Parsed items ready for database
+            - validation_errors: List[str] - Any validation errors
+            - items_count: int - Number of items parsed
+    """
+    # First validate structure
+    validation = validate_excel_structure(file_path)
+    if not validation["valid"]:
+        return {
+            "items": [],
+            "validation_errors": validation["errors"],
+            "items_count": 0
+        }
+
+    items: List[Dict[str, Any]] = []
+    errors: List[str] = []
+
+    try:
+        # Load workbook twice: once with formulas, once with data_only
+        wb = openpyxl.load_workbook(file_path, data_only=False)
+        ws = wb["CONTOH"]
+
+        wb_data = openpyxl.load_workbook(file_path, data_only=True)
+        ws_data = wb_data["CONTOH"]
+
+        # Extract answer key from Row 2
+        answer_key: Dict[int, str] = {}
+        for col in range(4, ws.max_column + 1):
+            key_cell = ws.cell(2, col).value
+            if key_cell and key_cell != "KUNCI":
+                slot_num = col - 3  # Column 4 -> slot 1
+                answer_key[slot_num] = str(key_cell).strip().upper()
+
+        # Extract p-values from Row 4
+        p_values: Dict[int, float] = {}
+        for col in range(4, ws.max_column + 1):
+            slot_num = col - 3
+            if slot_num in answer_key:
+                p_cell = ws_data.cell(4, col).value
+                if p_cell and isinstance(p_cell, (int, float)):
+                    p_values[slot_num] = float(p_cell)
+
+        # Extract bobot from Row 5
+        bobot_values: Dict[int, float] = {}
+        for col in range(4, ws.max_column + 1):
+            slot_num = col - 3
+            if slot_num in answer_key:
+                bobot_cell = ws_data.cell(5, col).value
+                if bobot_cell and isinstance(bobot_cell, (int, float)):
+                    bobot_values[slot_num] = float(bobot_cell)
+
+        # Parse question data rows (6+)
+        for row_idx in range(6, ws.max_row + 1):
+            # Column mapping (based on project-brief):
+            # Column 1 (A): slot (question number)
+            # Column 2 (B): level (mudah/sedang/sulit)
+            # Column 3 (C): soal_text (question stem)
+            # Column 4 (D): options_A
+            # Column 5 (E): options_B
+            # Column 6 (F): options_C
+            # Column 7 (G): options_D
+            # Column 8 (H): correct_answer
+
+            slot_cell = ws.cell(row_idx, 1).value
+            level_cell = ws.cell(row_idx, 2).value
+            soal_text_cell = ws.cell(row_idx, 3).value
+            option_a = ws.cell(row_idx, 4).value
+            option_b = ws.cell(row_idx, 5).value
+            option_c = ws.cell(row_idx, 6).value
+            option_d = ws.cell(row_idx, 7).value
+            correct_cell = ws.cell(row_idx, 8).value
+
+            # Skip empty rows
+            if not slot_cell and not soal_text_cell:
+                continue
+
+            # Validate required fields
+            if not slot_cell:
+                errors.append(f"Row {row_idx}: Missing slot value")
+                continue
+
+            slot_num = int(slot_cell) if isinstance(slot_cell, (int, float)) else None
+            if slot_num is None:
+                try:
+                    slot_num = int(str(slot_cell).strip())
+                except (ValueError, AttributeError):
+                    errors.append(f"Row {row_idx}: Invalid slot value: {slot_cell}")
+                    continue
+
+            # Get or infer level
+            if not level_cell:
+                # Use p-value from Row 4 to determine level
+                p_val = p_values.get(slot_num, 0.5)
+                level_val = categorize_difficulty(p_val)
+            else:
+                level_val = str(level_cell).strip().lower()
+                if level_val not in ["mudah", "sedang", "sulit"]:
+                    errors.append(
+                        f"Row {row_idx}: Invalid level '{level_cell}'. Must be 'mudah', 'sedang', or 'sulit'"
+                    )
+                    continue
+
+            # Validate soal_text
+            if not soal_text_cell:
+                errors.append(f"Row {row_idx} (slot {slot_num}): Missing soal_text (question stem)")
+                continue
+
+            # Build options JSON
+            options: Dict[str, str] = {}
+            if option_a:
+                options["A"] = str(option_a).strip()
+            if option_b:
+                options["B"] = str(option_b).strip()
+            if option_c:
+                options["C"] = str(option_c).strip()
+            if option_d:
+                options["D"] = str(option_d).strip()
+
+            if len(options) < 4:
+                errors.append(
+                    f"Row {row_idx} (slot {slot_num}): Missing options. Expected 4 options (A, B, C, D)"
+                )
+                continue
+
+            # Get correct answer
+            if not correct_cell:
+                # Fall back to answer key from Row 2
+                correct_ans = answer_key.get(slot_num)
+                if not correct_ans:
+                    errors.append(
+                        f"Row {row_idx} (slot {slot_num}): Missing correct_answer and no answer key found"
+                    )
+                    continue
+            else:
+                correct_ans = str(correct_cell).strip().upper()
+
+            if correct_ans not in ["A", "B", "C", "D"]:
+                errors.append(
+                    f"Row {row_idx} (slot {slot_num}): Invalid correct_answer '{correct_ans}'. Must be A, B, C, or D"
+                )
+                continue
+
+            # Get CTT parameters
+            p_val = p_values.get(slot_num, 0.5)
+            bobot_val = bobot_values.get(slot_num, 1.0 - p_val)
+
+            # Validate p-value range
+            if p_val < 0 or p_val > 1:
+                errors.append(
+                    f"Slot {slot_num}: Invalid p-value {p_val}. Must be in range [0, 1]"
+                )
+                continue
+
+            # Validate bobot range
+            if bobot_val < 0 or bobot_val > 1:
+                errors.append(
+                    f"Slot {slot_num}: Invalid bobot {bobot_val}. Must be in range [0, 1]"
+                )
+                continue
+
+            # Calculate CTT category and IRT b parameter
+            ctt_cat = categorize_difficulty(p_val)
+            irt_b = convert_ctt_p_to_irt_b(p_val)
+
+            # Build item dict
+            item = {
+                "tryout_id": tryout_id,
+                "website_id": website_id,
+                "slot": slot_num,
+                "level": level_val,
+                "stem": str(soal_text_cell).strip(),
+                "options": options,
+                "correct_answer": correct_ans,
+                "explanation": None,
+                "ctt_p": p_val,
+                "ctt_bobot": bobot_val,
+                "ctt_category": ctt_cat,
+                "irt_b": irt_b,
+                "irt_se": None,
+                "calibrated": False,
+                "calibration_sample_size": 0,
+                "generated_by": "manual",
+                "ai_model": None,
+                "basis_item_id": None,
+            }
+
+            items.append(item)
+
+        return {
+            "items": items,
+            "validation_errors": errors,
+            "items_count": len(items)
+        }
+
+    except Exception as e:
+        return {
+            "items": [],
+            "validation_errors": [f"Parsing error: {str(e)}"],
+            "items_count": 0
+        }
+
+
+async def bulk_insert_items(
+    items_list: List[Dict[str, Any]],
+    db: AsyncSession
+) -> Dict[str, Any]:
+    """
+    Bulk insert items with duplicate detection.
+
+    Skips duplicates based on (tryout_id, website_id, slot).
+
+    Args:
+        items_list: List of item dictionaries to insert
+        db: Async SQLAlchemy database session
+
+    Returns:
+        Dict with:
+            - inserted_count: int - Number of items inserted
+            - duplicate_count: int - Number of duplicates skipped
+            - errors: List[str] - Any errors during insertion
+    """
+    inserted_count = 0
+    duplicate_count = 0
+    errors: List[str] = []
+
+    try:
+        for item_data in items_list:
+            # Check for duplicate
+            result = await db.execute(
+                select(Item).where(
+                    Item.tryout_id == item_data["tryout_id"],
+                    Item.website_id == item_data["website_id"],
+                    Item.slot == item_data["slot"]
+                )
+            )
+            existing = result.scalar_one_or_none()
+
+            if existing:
+                duplicate_count += 1
+                continue
+
+            # Create new item
+            item = Item(**item_data)
+            db.add(item)
+            inserted_count += 1
+
+        # Commit all inserts
+        await db.commit()
+
+        return {
+            "inserted_count": inserted_count,
+            "duplicate_count": duplicate_count,
+            "errors": errors
+        }
+
+    except Exception as e:
+        await db.rollback()
+        return {
+            "inserted_count": 0,
+            "duplicate_count": duplicate_count,
+            "errors": [f"Insertion failed: {str(e)}"]
+        }
+
+
+async def export_questions_to_excel(
+    tryout_id: str,
+    website_id: int,
+    db: AsyncSession,
+    output_path: Optional[str] = None
+) -> str:
+    """
+    Export questions to Excel in standardized format.
+
+    Creates Excel workbook with:
+    - Sheet "CONTOH"
+    - Row 2: KUNCI (answer key)
+    - Row 4: TK (p-values)
+    - Row 5: BOBOT (weights)
+    - Rows 6+: Question data
+
+    Args:
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        db: Async SQLAlchemy database session
+        output_path: Optional output file path. If not provided, generates temp file.
+
+    Returns:
+        Path to exported Excel file
+    """
+    # Fetch all items for this tryout
+    result = await db.execute(
+        select(Item).filter(
+            Item.tryout_id == tryout_id,
+            Item.website_id == website_id
+        ).order_by(Item.slot)
+    )
+    items = result.scalars().all()
+
+    if not items:
+        raise ValueError(f"No items found for tryout_id={tryout_id}, website_id={website_id}")
+
+    # Create workbook
+    wb = openpyxl.Workbook()
+    ws = wb.active
+    ws.title = "CONTOH"
+
+    # Determine max slot for column sizing
+    max_slot = max(item.slot for item in items)
+
+    # Row 1: Header
+    ws.cell(1, 1, "No")
+    ws.cell(1, 2, "Level")
+    ws.cell(1, 3, "Soal")
+    for slot_idx in range(max_slot):
+        col = slot_idx + 4
+        ws.cell(1, col, f"Soal {slot_idx + 1}")
+
+    # Row 2: KUNCI (answer key)
+    ws.cell(2, 1, "")
+    ws.cell(2, 2, "")
+    ws.cell(2, 3, "KUNCI")
+    for item in items:
+        col = item.slot + 3
+        ws.cell(2, col, item.correct_answer)
+
+    # Row 3: Empty
+    ws.cell(3, 1, "")
+    ws.cell(3, 2, "")
+    ws.cell(3, 3, "")
+
+    # Row 4: TK (p-values)
+    ws.cell(4, 1, "")
+    ws.cell(4, 2, "")
+    ws.cell(4, 3, "TK")
+    for item in items:
+        col = item.slot + 3
+        ws.cell(4, col, item.ctt_p or 0.5)
+
+    # Row 5: BOBOT (weights)
+    ws.cell(5, 1, "")
+    ws.cell(5, 2, "")
+    ws.cell(5, 3, "BOBOT")
+    for item in items:
+        col = item.slot + 3
+        ws.cell(5, col, item.ctt_bobot or (1.0 - (item.ctt_p or 0.5)))
+
+    # Rows 6+: Question data
+    row_idx = 6
+    for item in items:
+        # Column 1: Slot number
+        ws.cell(row_idx, 1, item.slot)
+
+        # Column 2: Level
+        ws.cell(row_idx, 2, item.level)
+
+        # Column 3: Soal text (stem)
+        ws.cell(row_idx, 3, item.stem)
+
+        # Columns 4+: Options
+        options = item.options or {}
+        ws.cell(row_idx, 4, options.get("A", ""))
+        ws.cell(row_idx, 5, options.get("B", ""))
+        ws.cell(row_idx, 6, options.get("C", ""))
+        ws.cell(row_idx, 7, options.get("D", ""))
+
+        # Column 8: Correct answer
+        ws.cell(row_idx, 8, item.correct_answer)
+
+        row_idx += 1
+
+    # Generate output path if not provided
+    if output_path is None:
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        output_path = f"/tmp/tryout_{tryout_id}_export_{timestamp}.xlsx"
+
+    # Save workbook
+    wb.save(output_path)
+
+    return output_path
--- a/app/services/irt_calibration.py
+++ b/app/services/irt_calibration.py
--- a/app/services/normalization.py
+++ b/app/services/normalization.py
@@ -0,0 +1,538 @@
+"""
+Dynamic Normalization Service.
+
+Implements dynamic normalization with real-time calculation of rataan and SB
+for each tryout. Supports multiple normalization modes:
+- Static: Use hardcoded rataan/SB from config
+- Dynamic: Calculate rataan/SB from participant NM scores in real-time
+- Hybrid: Use static until threshold reached, then switch to dynamic
+"""
+
+import logging
+import math
+from datetime import datetime, timezone
+from typing import Literal, Optional, Tuple
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.tryout import Tryout
+from app.models.tryout_stats import TryoutStats
+
+logger = logging.getLogger(__name__)
+
+
+async def calculate_dynamic_stats(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> Tuple[Optional[float], Optional[float]]:
+    """
+    Calculate current dynamic stats (rataan and SB) from TryoutStats.
+
+    Fetches current TryoutStats for this (tryout_id, website_id) pair
+    and returns the calculated rataan and SB values.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Tuple of (rataan, sb), both None if no stats exist
+    """
+    result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = result.scalar_one_or_none()
+
+    if stats is None:
+        return None, None
+
+    return stats.rataan, stats.sb
+
+
+async def update_dynamic_normalization(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+    nm: int,
+) -> Tuple[float, float]:
+    """
+    Update dynamic normalization with new NM score.
+
+    Fetches current TryoutStats and incrementally updates it with the new NM:
+    - Increments participant_count by 1
+    - Adds NM to total_nm_sum
+    - Adds NM² to total_nm_sq_sum
+    - Recalculates rataan and sb
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+        nm: Nilai Mentah (raw score) to add
+
+    Returns:
+        Tuple of updated (rataan, sb)
+
+    Raises:
+        ValueError: If nm is out of valid range [0, 1000]
+    """
+    if not 0 <= nm <= 1000:
+        raise ValueError(f"nm must be in range [0, 1000], got {nm}")
+
+    result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = result.scalar_one_or_none()
+
+    if stats is None:
+        # Initialize new stats record
+        stats = TryoutStats(
+            website_id=website_id,
+            tryout_id=tryout_id,
+            participant_count=1,
+            total_nm_sum=float(nm),
+            total_nm_sq_sum=float(nm * nm),
+            rataan=float(nm),
+            sb=0.0,  # SD is 0 for single data point
+            min_nm=nm,
+            max_nm=nm,
+            last_calculated=datetime.now(timezone.utc),
+        )
+        db.add(stats)
+    else:
+        # Incrementally update existing stats
+        stats.participant_count += 1
+        stats.total_nm_sum += nm
+        stats.total_nm_sq_sum += nm * nm
+
+        # Update min/max
+        if stats.min_nm is None or nm < stats.min_nm:
+            stats.min_nm = nm
+        if stats.max_nm is None or nm > stats.max_nm:
+            stats.max_nm = nm
+
+        # Recalculate mean and SD
+        n = stats.participant_count
+        sum_nm = stats.total_nm_sum
+        sum_nm_sq = stats.total_nm_sq_sum
+
+        # Mean = Σ NM / n
+        mean = sum_nm / n
+        stats.rataan = mean
+
+        # Variance = (Σ NM² / n) - (mean)²
+        # Using population standard deviation
+        if n > 1:
+            variance = (sum_nm_sq / n) - (mean ** 2)
+            # Clamp variance to non-negative (handles floating point errors)
+            variance = max(0.0, variance)
+            stats.sb = math.sqrt(variance)
+        else:
+            stats.sb = 0.0
+
+        stats.last_calculated = datetime.now(timezone.utc)
+
+    await db.flush()
+
+    logger.info(
+        f"Updated dynamic normalization for tryout {tryout_id}, "
+        f"website {website_id}: participant_count={stats.participant_count}, "
+        f"rataan={stats.rataan:.2f}, sb={stats.sb:.2f}"
+    )
+
+    # rataan and sb are always set by this function
+    assert stats.rataan is not None
+    assert stats.sb is not None
+    return stats.rataan, stats.sb
+
+
+def apply_normalization(
+    nm: int,
+    rataan: float,
+    sb: float,
+) -> int:
+    """
+    Apply normalization to NM to get NN (Nilai Nasional).
+
+    Formula: NN = 500 + 100 × ((NM - Rataan) / SB)
+
+    Normalizes scores to mean=500, SD=100 distribution.
+
+    Args:
+        nm: Nilai Mentah (raw score) in range [0, 1000]
+        rataan: Mean of NM scores
+        sb: Standard deviation of NM scores
+
+    Returns:
+        NN (normalized score) in range [0, 1000]
+
+    Raises:
+        ValueError: If nm is out of range or sb is invalid
+    """
+    if not 0 <= nm <= 1000:
+        raise ValueError(f"nm must be in range [0, 1000], got {nm}")
+    if sb <= 0:
+        # If SD is 0 or negative, return default normalized score
+        # This handles edge case where all scores are identical
+        return 500
+
+    # Calculate normalized score
+    z_score = (nm - rataan) / sb
+    nn = 500 + 100 * z_score
+
+    # Round to integer and clamp to valid range [0, 1000]
+    nn_int = round(nn)
+    return max(0, min(1000, nn_int))
+
+
+async def get_normalization_mode(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> Literal["static", "dynamic", "hybrid"]:
+    """
+    Get the current normalization mode for a tryout.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Normalization mode: "static", "dynamic", or "hybrid"
+
+    Raises:
+        ValueError: If tryout not found
+    """
+    result = await db.execute(
+        select(Tryout).where(
+            Tryout.website_id == website_id,
+            Tryout.tryout_id == tryout_id,
+        )
+    )
+    tryout = result.scalar_one_or_none()
+
+    if tryout is None:
+        raise ValueError(
+            f"Tryout {tryout_id} not found for website {website_id}"
+        )
+
+    return tryout.normalization_mode
+
+
+async def check_threshold_for_dynamic(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> bool:
+    """
+    Check if participant count meets threshold for dynamic normalization.
+
+    Compares current participant_count with min_sample_for_dynamic from config.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        True if participant_count >= min_sample_for_dynamic, else False
+    """
+    # Fetch current TryoutStats
+    stats_result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = stats_result.scalar_one_or_none()
+    current_participant_count = stats.participant_count if stats else 0
+
+    # Fetch min_sample_for_dynamic from config
+    tryout_result = await db.execute(
+        select(Tryout.min_sample_for_dynamic).where(
+            Tryout.website_id == website_id,
+            Tryout.tryout_id == tryout_id,
+        )
+    )
+    min_sample = tryout_result.scalar_one_or_none()
+
+    if min_sample is None:
+        # Default to 100 if not configured
+        min_sample = 100
+
+    return current_participant_count >= min_sample
+
+
+async def get_normalization_params(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> Tuple[float, float, Literal["static", "dynamic"]]:
+    """
+    Get normalization parameters (rataan, sb) based on current mode.
+
+    Determines which normalization parameters to use:
+    - Static mode: Use config.static_rataan and config.static_sb
+    - Dynamic mode: Use calculated rataan and sb from TryoutStats
+    - Hybrid mode: Use static until threshold reached, then dynamic
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Tuple of (rataan, sb, mode_used)
+
+    Raises:
+        ValueError: If tryout not found or dynamic stats unavailable
+    """
+    # Get normalization mode
+    mode = await get_normalization_mode(db, website_id, tryout_id)
+
+    if mode == "static":
+        # Use static values from config
+        result = await db.execute(
+            select(Tryout.static_rataan, Tryout.static_sb).where(
+                Tryout.website_id == website_id,
+                Tryout.tryout_id == tryout_id,
+            )
+        )
+        row = result.scalar_one_or_none()
+
+        if row is None:
+            raise ValueError(
+                f"Tryout {tryout_id} not found for website {website_id}"
+            )
+
+        rataan, sb = row
+        return rataan, sb, "static"
+
+    elif mode == "dynamic":
+        # Use dynamic values from stats
+        rataan, sb = await calculate_dynamic_stats(db, website_id, tryout_id)
+
+        if rataan is None or sb is None:
+            raise ValueError(
+                f"Dynamic normalization not available for tryout {tryout_id}. "
+                "No stats have been calculated yet."
+            )
+
+        if sb == 0:
+            logger.warning(
+                f"Standard deviation is 0 for tryout {tryout_id}. "
+                "All NM scores are identical."
+            )
+
+        return rataan, sb, "dynamic"
+
+    else:  # hybrid
+        # Check threshold
+        threshold_met = await check_threshold_for_dynamic(db, website_id, tryout_id)
+
+        if threshold_met:
+            # Use dynamic values
+            rataan, sb = await calculate_dynamic_stats(db, website_id, tryout_id)
+
+            if rataan is None or sb is None:
+                # Fallback to static if dynamic not available
+                result = await db.execute(
+                    select(Tryout.static_rataan, Tryout.static_sb).where(
+                        Tryout.website_id == website_id,
+                        Tryout.tryout_id == tryout_id,
+                    )
+                )
+                row = result.scalar_one_or_none()
+                if row is None:
+                    raise ValueError(
+                        f"Tryout {tryout_id} not found for website {website_id}"
+                    )
+                rataan, sb = row
+                return rataan, sb, "static"
+
+            return rataan, sb, "dynamic"
+        else:
+            # Use static values
+            result = await db.execute(
+                select(Tryout.static_rataan, Tryout.static_sb).where(
+                    Tryout.website_id == website_id,
+                    Tryout.tryout_id == tryout_id,
+                )
+            )
+            row = result.scalar_one_or_none()
+            if row is None:
+                raise ValueError(
+                    f"Tryout {tryout_id} not found for website {website_id}"
+                )
+            rataan, sb = row
+            return rataan, sb, "static"
+
+
+async def calculate_skewness(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+) -> Optional[float]:
+    """
+    Calculate skewness of NM distribution for validation.
+
+    Skewness measures the asymmetry of the probability distribution.
+    Values:
+    - Skewness ≈ 0: Symmetric distribution
+    - Skewness > 0: Right-skewed (tail to the right)
+    - Skewness < 0: Left-skewed (tail to the left)
+
+    Formula: Skewness = (n / ((n-1)(n-2))) * Σ((x - mean) / SD)³
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+
+    Returns:
+        Skewness value, or None if insufficient data
+    """
+    result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = result.scalar_one_or_none()
+
+    if stats is None or stats.participant_count < 3:
+        # Need at least 3 samples for skewness calculation
+        return None
+
+    n = stats.participant_count
+    mean = stats.rataan
+    sd = stats.sb
+
+    if sd == 0:
+        return 0.0  # All values are identical
+
+    # Calculate skewness
+    # We need individual NM values, which we don't have in TryoutStats
+    # For now, return None as we need a different approach
+    # This would require storing all NM values or calculating on-the-fly
+    return None
+
+
+async def validate_dynamic_normalization(
+    db: AsyncSession,
+    website_id: int,
+    tryout_id: str,
+    target_mean: float = 500.0,
+    target_sd: float = 100.0,
+    mean_tolerance: float = 5.0,
+    sd_tolerance: float = 5.0,
+) -> Tuple[bool, dict]:
+    """
+    Validate that dynamic normalization produces expected distribution.
+
+    Checks if calculated rataan and sb are close to target values.
+
+    Args:
+        db: Async database session
+        website_id: Website identifier
+        tryout_id: Tryout identifier
+        target_mean: Target mean (default: 500)
+        target_sd: Target standard deviation (default: 100)
+        mean_tolerance: Allowed deviation from target mean (default: 5)
+        sd_tolerance: Allowed deviation from target SD (default: 5)
+
+    Returns:
+        Tuple of (is_valid, validation_details)
+
+        validation_details contains:
+        - participant_count: Number of participants
+        - current_rataan: Current mean
+        - current_sb: Current standard deviation
+        - mean_deviation: Absolute deviation from target mean
+        - sd_deviation: Absolute deviation from target SD
+        - mean_within_tolerance: True if mean deviation < mean_tolerance
+        - sd_within_tolerance: True if SD deviation < sd_tolerance
+        - warnings: List of warning messages
+        - suggestions: List of suggestions
+    """
+    # Get current stats
+    result = await db.execute(
+        select(TryoutStats).where(
+            TryoutStats.website_id == website_id,
+            TryoutStats.tryout_id == tryout_id,
+        )
+    )
+    stats = result.scalar_one_or_none()
+
+    if stats is None or stats.rataan is None or stats.sb is None:
+        return False, {
+            "participant_count": 0,
+            "current_rataan": None,
+            "current_sb": None,
+            "mean_deviation": None,
+            "sd_deviation": None,
+            "mean_within_tolerance": False,
+            "sd_within_tolerance": False,
+            "warnings": ["No statistics available for validation"],
+            "suggestions": ["Wait for more participants to complete sessions"],
+        }
+
+    # Calculate deviations
+    mean_deviation = abs(stats.rataan - target_mean)
+    sd_deviation = abs(stats.sb - target_sd)
+
+    # Check tolerance
+    mean_within_tolerance = mean_deviation <= mean_tolerance
+    sd_within_tolerance = sd_deviation <= sd_tolerance
+
+    is_valid = mean_within_tolerance and sd_within_tolerance
+
+    # Generate warnings
+    warnings = []
+    suggestions = []
+
+    if not mean_within_tolerance:
+        warnings.append(f"Mean deviation ({mean_deviation:.2f}) exceeds tolerance ({mean_tolerance})")
+        if stats.rataan > target_mean:
+            suggestions.append("Distribution may be right-skewed - consider checking question difficulty")
+        else:
+            suggestions.append("Distribution may be left-skewed - consider checking question difficulty")
+
+    if not sd_within_tolerance:
+        warnings.append(f"SD deviation ({sd_deviation:.2f}) exceeds tolerance ({sd_tolerance})")
+        if stats.sb < target_sd:
+            suggestions.append("SD too low - scores may be too tightly clustered")
+        else:
+            suggestions.append("SD too high - scores may have too much variance")
+
+    # Check for skewness
+    skewness = await calculate_skewness(db, website_id, tryout_id)
+    if skewness is not None and abs(skewness) > 0.5:
+        warnings.append(f"Distribution skewness ({skewness:.2f}) > 0.5 - distribution may be asymmetric")
+        suggestions.append("Consider using static normalization if dynamic normalization is unstable")
+
+    # Check participant count
+    if stats.participant_count < 100:
+        suggestions.append(f"Participant count ({stats.participant_count}) below recommended minimum (100)")
+
+    return is_valid, {
+        "participant_count": stats.participant_count,
+        "current_rataan": stats.rataan,
+        "current_sb": stats.sb,
+        "mean_deviation": mean_deviation,
+        "sd_deviation": sd_deviation,
+        "mean_within_tolerance": mean_within_tolerance,
+        "sd_within_tolerance": sd_within_tolerance,
+        "warnings": warnings,
+        "suggestions": suggestions,
+    }
--- a/app/services/reporting.py
+++ b/app/services/reporting.py
--- a/app/services/wordpress_auth.py
+++ b/app/services/wordpress_auth.py
@@ -0,0 +1,456 @@
+"""
+WordPress Authentication and User Synchronization Service.
+
+Handles:
+- JWT token validation via WordPress REST API
+- User synchronization from WordPress to local database
+- Multi-site support via website_id isolation
+"""
+
+import logging
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+import httpx
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.config import get_settings
+from app.models.user import User
+from app.models.website import Website
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+
+# Custom exceptions for WordPress integration
+class WordPressAuthError(Exception):
+    """Base exception for WordPress authentication errors."""
+    pass
+
+
+class WordPressTokenInvalidError(WordPressAuthError):
+    """Raised when WordPress token is invalid or expired."""
+    pass
+
+
+class WordPressAPIError(WordPressAuthError):
+    """Raised when WordPress API is unreachable or returns error."""
+    pass
+
+
+class WordPressRateLimitError(WordPressAuthError):
+    """Raised when WordPress API rate limit is exceeded."""
+    pass
+
+
+class WebsiteNotFoundError(WordPressAuthError):
+    """Raised when website_id is not found in local database."""
+    pass
+
+
+@dataclass
+class WordPressUserInfo:
+    """Data class for WordPress user information."""
+    wp_user_id: str
+    username: str
+    email: str
+    display_name: str
+    roles: list[str]
+    raw_data: dict[str, Any]
+
+
+@dataclass
+class SyncStats:
+    """Data class for user synchronization statistics."""
+    inserted: int
+    updated: int
+    total: int
+    errors: int
+
+
+async def get_wordpress_api_base(website: Website) -> str:
+    """
+    Get WordPress API base URL for a website.
+    
+    Args:
+        website: Website model instance
+        
+    Returns:
+        WordPress REST API base URL
+    """
+    # Use website's site_url if configured, otherwise use global config
+    base_url = website.site_url.rstrip('/')
+    return f"{base_url}/wp-json"
+
+
+async def verify_wordpress_token(
+    token: str,
+    website_id: int,
+    wp_user_id: str,
+    db: AsyncSession,
+) -> Optional[WordPressUserInfo]:
+    """
+    Verify WordPress JWT token and validate user identity.
+    
+    Calls WordPress REST API GET /wp/v2/users/me with Authorization header.
+    Verifies response contains matching wp_user_id.
+    Verifies website_id exists in local database.
+    
+    Args:
+        token: WordPress JWT authentication token
+        website_id: Website identifier for multi-site isolation
+        wp_user_id: Expected WordPress user ID to verify
+        db: Async database session
+        
+    Returns:
+        WordPressUserInfo if valid, None if invalid
+        
+    Raises:
+        WebsiteNotFoundError: If website_id doesn't exist
+        WordPressTokenInvalidError: If token is invalid
+        WordPressAPIError: If API is unreachable
+        WordPressRateLimitError: If rate limited
+    """
+    # Verify website exists
+    website_result = await db.execute(
+        select(Website).where(Website.id == website_id)
+    )
+    website = website_result.scalar_one_or_none()
+    
+    if website is None:
+        raise WebsiteNotFoundError(f"Website {website_id} not found")
+    
+    api_base = await get_wordpress_api_base(website)
+    url = f"{api_base}/wp/v2/users/me"
+    
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+    }
+    
+    timeout = httpx.Timeout(10.0, connect=5.0)
+    
+    try:
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.get(url, headers=headers)
+            
+            if response.status_code == 401:
+                raise WordPressTokenInvalidError("Invalid or expired WordPress token")
+            
+            if response.status_code == 429:
+                raise WordPressRateLimitError("WordPress API rate limit exceeded")
+            
+            if response.status_code == 503:
+                raise WordPressAPIError("WordPress API service unavailable")
+            
+            if response.status_code != 200:
+                raise WordPressAPIError(
+                    f"WordPress API error: {response.status_code} - {response.text}"
+                )
+            
+            data = response.json()
+            
+            # Verify user ID matches
+            response_user_id = str(data.get("id", ""))
+            if response_user_id != str(wp_user_id):
+                logger.warning(
+                    f"User ID mismatch: expected {wp_user_id}, got {response_user_id}"
+                )
+                return None
+            
+            # Extract user info
+            user_info = WordPressUserInfo(
+                wp_user_id=response_user_id,
+                username=data.get("username", ""),
+                email=data.get("email", ""),
+                display_name=data.get("name", ""),
+                roles=data.get("roles", []),
+                raw_data=data,
+            )
+            
+            return user_info
+            
+    except httpx.TimeoutException:
+        raise WordPressAPIError("WordPress API request timed out")
+    except httpx.ConnectError:
+        raise WordPressAPIError("Unable to connect to WordPress API")
+    except httpx.HTTPError as e:
+        raise WordPressAPIError(f"HTTP error communicating with WordPress: {str(e)}")
+
+
+async def fetch_wordpress_users(
+    website: Website,
+    admin_token: str,
+    page: int = 1,
+    per_page: int = 100,
+) -> list[dict[str, Any]]:
+    """
+    Fetch users from WordPress API (requires admin token).
+    
+    Calls WordPress REST API GET /wp/v2/users with admin authorization.
+    
+    Args:
+        website: Website model instance
+        admin_token: WordPress admin JWT token
+        page: Page number for pagination
+        per_page: Number of users per page (max 100)
+        
+    Returns:
+        List of WordPress user data dictionaries
+        
+    Raises:
+        WordPressTokenInvalidError: If admin token is invalid
+        WordPressAPIError: If API is unreachable
+        WordPressRateLimitError: If rate limited
+    """
+    api_base = await get_wordpress_api_base(website)
+    url = f"{api_base}/wp/v2/users"
+    
+    headers = {
+        "Authorization": f"Bearer {admin_token}",
+        "Accept": "application/json",
+    }
+    
+    params = {
+        "page": page,
+        "per_page": min(per_page, 100),
+        "context": "edit",  # Get full user data
+    }
+    
+    timeout = httpx.Timeout(30.0, connect=10.0)
+    
+    try:
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.get(url, headers=headers, params=params)
+            
+            if response.status_code == 401:
+                raise WordPressTokenInvalidError("Invalid admin token for user sync")
+            
+            if response.status_code == 403:
+                raise WordPressTokenInvalidError(
+                    "Admin token lacks permission to list users"
+                )
+            
+            if response.status_code == 429:
+                raise WordPressRateLimitError("WordPress API rate limit exceeded")
+            
+            if response.status_code == 503:
+                raise WordPressAPIError("WordPress API service unavailable")
+            
+            if response.status_code != 200:
+                raise WordPressAPIError(
+                    f"WordPress API error: {response.status_code} - {response.text}"
+                )
+            
+            return response.json()
+            
+    except httpx.TimeoutException:
+        raise WordPressAPIError("WordPress API request timed out")
+    except httpx.ConnectError:
+        raise WordPressAPIError("Unable to connect to WordPress API")
+    except httpx.HTTPError as e:
+        raise WordPressAPIError(f"HTTP error communicating with WordPress: {str(e)}")
+
+
+async def sync_wordpress_users(
+    website_id: int,
+    admin_token: str,
+    db: AsyncSession,
+) -> SyncStats:
+    """
+    Synchronize users from WordPress to local database.
+    
+    Fetches all users from WordPress API and performs upsert:
+    - Updates existing users
+    - Inserts new users
+    
+    Args:
+        website_id: Website identifier for multi-site isolation
+        admin_token: WordPress admin JWT token
+        db: Async database session
+        
+    Returns:
+        SyncStats with insertion/update counts
+        
+    Raises:
+        WebsiteNotFoundError: If website_id doesn't exist
+        WordPressTokenInvalidError: If admin token is invalid
+        WordPressAPIError: If API is unreachable
+    """
+    # Verify website exists
+    website_result = await db.execute(
+        select(Website).where(Website.id == website_id)
+    )
+    website = website_result.scalar_one_or_none()
+    
+    if website is None:
+        raise WebsiteNotFoundError(f"Website {website_id} not found")
+    
+    # Fetch existing users from local database
+    existing_users_result = await db.execute(
+        select(User).where(User.website_id == website_id)
+    )
+    existing_users = {
+        str(user.wp_user_id): user 
+        for user in existing_users_result.scalars().all()
+    }
+    
+    # Fetch users from WordPress (with pagination)
+    all_wp_users = []
+    page = 1
+    per_page = 100
+    
+    while True:
+        wp_users = await fetch_wordpress_users(
+            website, admin_token, page, per_page
+        )
+        
+        if not wp_users:
+            break
+            
+        all_wp_users.extend(wp_users)
+        
+        # Check if more pages
+        if len(wp_users) < per_page:
+            break
+            
+        page += 1
+    
+    # Sync users
+    inserted = 0
+    updated = 0
+    errors = 0
+    
+    for wp_user in all_wp_users:
+        try:
+            wp_user_id = str(wp_user.get("id", ""))
+            
+            if not wp_user_id:
+                errors += 1
+                continue
+            
+            if wp_user_id in existing_users:
+                # Update existing user (timestamp update)
+                existing_user = existing_users[wp_user_id]
+                existing_user.updated_at = datetime.now(timezone.utc)
+                updated += 1
+            else:
+                # Insert new user
+                new_user = User(
+                    wp_user_id=wp_user_id,
+                    website_id=website_id,
+                    created_at=datetime.now(timezone.utc),
+                    updated_at=datetime.now(timezone.utc),
+                )
+                db.add(new_user)
+                inserted += 1
+                
+        except Exception as e:
+            logger.error(f"Error syncing user {wp_user.get('id')}: {e}")
+            errors += 1
+    
+    await db.commit()
+    
+    total = inserted + updated
+    
+    logger.info(
+        f"WordPress user sync complete for website {website_id}: "
+        f"{inserted} inserted, {updated} updated, {errors} errors"
+    )
+    
+    return SyncStats(
+        inserted=inserted,
+        updated=updated,
+        total=total,
+        errors=errors,
+    )
+
+
+async def get_wordpress_user(
+    wp_user_id: str,
+    website_id: int,
+    db: AsyncSession,
+) -> Optional[User]:
+    """
+    Get user from local database by WordPress user ID and website ID.
+    
+    Args:
+        wp_user_id: WordPress user ID
+        website_id: Website identifier for multi-site isolation
+        db: Async database session
+        
+    Returns:
+        User object if found, None otherwise
+    """
+    result = await db.execute(
+        select(User).where(
+            User.wp_user_id == wp_user_id,
+            User.website_id == website_id,
+        )
+    )
+    return result.scalar_one_or_none()
+
+
+async def verify_website_exists(
+    website_id: int,
+    db: AsyncSession,
+) -> Website:
+    """
+    Verify website exists in database.
+    
+    Args:
+        website_id: Website identifier
+        db: Async database session
+        
+    Returns:
+        Website model instance
+        
+    Raises:
+        WebsiteNotFoundError: If website doesn't exist
+    """
+    result = await db.execute(
+        select(Website).where(Website.id == website_id)
+    )
+    website = result.scalar_one_or_none()
+    
+    if website is None:
+        raise WebsiteNotFoundError(f"Website {website_id} not found")
+    
+    return website
+
+
+async def get_or_create_user(
+    wp_user_id: str,
+    website_id: int,
+    db: AsyncSession,
+) -> User:
+    """
+    Get existing user or create new one if not exists.
+    
+    Args:
+        wp_user_id: WordPress user ID
+        website_id: Website identifier
+        db: Async database session
+        
+    Returns:
+        User model instance
+    """
+    existing = await get_wordpress_user(wp_user_id, website_id, db)
+    
+    if existing:
+        return existing
+    
+    # Create new user
+    new_user = User(
+        wp_user_id=wp_user_id,
+        website_id=website_id,
+        created_at=datetime.now(timezone.utc),
+        updated_at=datetime.now(timezone.utc),
+    )
+    db.add(new_user)
+    await db.commit()
+    await db.refresh(new_user)
+    
+    return new_user