From cf193d7ea07048ea1d05a47771c99cbb158bd4a7 Mon Sep 17 00:00:00 2001 From: Dwindi Ramadhana Date: Sat, 21 Mar 2026 23:32:59 +0700 Subject: [PATCH] first commit --- .claude/settings.local.json | 6 + .env.example | 31 + .gitignore | 30 + AAPANEL_DEPLOYMENT.md | 952 +++++++++++++++++++ PRD.md | 746 +++++++++++++++ TEST.md | 1395 +++++++++++++++++++++++++++ alembic.ini | 147 +++ alembic/README | 1 + alembic/env.py | 99 ++ alembic/script.py.mako | 28 + app/__init__.py | 7 + app/admin.py | 625 +++++++++++++ app/api/__init__.py | 5 + app/api/v1/__init__.py | 25 + app/api/v1/session.py | 388 ++++++++ app/core/__init__.py | 3 + app/core/config.py | 115 +++ app/database.py | 85 ++ app/main.py | 204 ++++ app/models/__init__.py | 25 + app/models/item.py | 222 +++++ app/models/session.py | 193 ++++ app/models/tryout.py | 184 ++++ app/models/tryout_stats.py | 151 +++ app/models/user.py | 72 ++ app/models/user_answer.py | 137 +++ app/models/website.py | 69 ++ app/routers/__init__.py | 13 + app/routers/admin.py | 249 +++++ app/routers/ai.py | 292 ++++++ app/routers/import_export.py | 324 +++++++ app/routers/normalization.py | 279 ++++++ app/routers/reports.py | 792 ++++++++++++++++ app/routers/sessions.py | 402 ++++++++ app/routers/tryouts.py | 458 +++++++++ app/routers/wordpress.py | 384 ++++++++ app/schemas/__init__.py | 65 ++ app/schemas/ai.py | 102 ++ app/schemas/report.py | 264 ++++++ app/schemas/session.py | 108 +++ app/schemas/tryout.py | 97 ++ app/schemas/wordpress.py | 86 ++ app/services/__init__.py | 155 +++ app/services/ai_generation.py | 595 ++++++++++++ app/services/cat_selection.py | 702 ++++++++++++++ app/services/config_management.py | 431 +++++++++ app/services/ctt_scoring.py | 385 ++++++++ app/services/excel_import.py | 521 +++++++++++ app/services/irt_calibration.py | 1124 ++++++++++++++++++++++ app/services/normalization.py | 538 +++++++++++ app/services/reporting.py | 1449 +++++++++++++++++++++++++++++ app/services/wordpress_auth.py | 456 +++++++++ handoff.md | 96 ++ irt_1pl_mle.py | 135 +++ project-brief.md | 1109 ++++++++++++++++++++++ requirements.txt | 40 + tests/test_normalization.py | 275 ++++++ 57 files changed, 17871 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 AAPANEL_DEPLOYMENT.md create mode 100644 PRD.md create mode 100644 TEST.md create mode 100644 alembic.ini create mode 100644 alembic/README create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako create mode 100644 app/__init__.py create mode 100644 app/admin.py create mode 100644 app/api/__init__.py create mode 100644 app/api/v1/__init__.py create mode 100644 app/api/v1/session.py create mode 100644 app/core/__init__.py create mode 100644 app/core/config.py create mode 100644 app/database.py create mode 100644 app/main.py create mode 100644 app/models/__init__.py create mode 100644 app/models/item.py create mode 100644 app/models/session.py create mode 100644 app/models/tryout.py create mode 100644 app/models/tryout_stats.py create mode 100644 app/models/user.py create mode 100644 app/models/user_answer.py create mode 100644 app/models/website.py create mode 100644 app/routers/__init__.py create mode 100644 app/routers/admin.py create mode 100644 app/routers/ai.py create mode 100644 app/routers/import_export.py create mode 100644 app/routers/normalization.py create mode 100644 app/routers/reports.py create mode 100644 app/routers/sessions.py create mode 100644 app/routers/tryouts.py create mode 100644 app/routers/wordpress.py create mode 100644 app/schemas/__init__.py create mode 100644 app/schemas/ai.py create mode 100644 app/schemas/report.py create mode 100644 app/schemas/session.py create mode 100644 app/schemas/tryout.py create mode 100644 app/schemas/wordpress.py create mode 100644 app/services/__init__.py create mode 100644 app/services/ai_generation.py create mode 100644 app/services/cat_selection.py create mode 100644 app/services/config_management.py create mode 100644 app/services/ctt_scoring.py create mode 100644 app/services/excel_import.py create mode 100644 app/services/irt_calibration.py create mode 100644 app/services/normalization.py create mode 100644 app/services/reporting.py create mode 100644 app/services/wordpress_auth.py create mode 100644 handoff.md create mode 100644 irt_1pl_mle.py create mode 100644 project-brief.md create mode 100644 requirements.txt create mode 100644 tests/test_normalization.py diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..121886a --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,6 @@ +{ + "enabledMcpjsonServers": [ + "brave-search" + ], + "enableAllProjectMcpServers": true +} diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..545f7ca --- /dev/null +++ b/.env.example @@ -0,0 +1,31 @@ +# Database +DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/irt_bank_soal +DB_HOST=localhost +DB_PORT=5432 +DB_NAME=irt_bank_soal +DB_USER=postgres +DB_PASSWORD=your_password_here + +# FastAPI +SECRET_KEY=your-secret-key-here-change-in-production +API_V1_STR=/api/v1 +PROJECT_NAME=IRT Bank Soal +ENVIRONMENT=development + +# OpenRouter (AI Generation) +OPENROUTER_API_KEY=your-openrouter-api-key-here +OPENROUTER_MODEL_QWEN=qwen/qwen-2.5-coder-32b-instruct +OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct +OPENROUTER_TIMEOUT=30 + +# WordPress Integration +WORDPRESS_API_URL=https://your-wordpress-site.com/wp-json +WORDPRESS_AUTH_TOKEN=your-wordpress-jwt-token + +# Redis (Celery) +REDIS_URL=redis://localhost:6379/0 +CELERY_BROKER_URL=redis://localhost:6379/0 +CELERY_RESULT_BACKEND=redis://localhost:6379/0 + +# CORS +ALLOWED_ORIGINS=https://site1.com,https://site2.com,https://site3.com diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..143a34a --- /dev/null +++ b/.gitignore @@ -0,0 +1,30 @@ +*.pyc +__pycache__/ +*.py[cod] +*$py.class +.env +.venv/ +venv/ +ENV/ +env/ +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +.pytest_cache/ +.coverage +htmlcov/ +.DS_Store diff --git a/AAPANEL_DEPLOYMENT.md b/AAPANEL_DEPLOYMENT.md new file mode 100644 index 0000000..3cd1f39 --- /dev/null +++ b/AAPANEL_DEPLOYMENT.md @@ -0,0 +1,952 @@ +# IRT Bank Soal - AaPanel Deployment Guide + +**Document Version:** 1.1 +**Date:** March 21, 2026 +**Project:** IRT-Powered Adaptive Question Bank System v1.2.0 +**Updated:** Clarified PostgreSQL setup using Databases > PgSQL menu + +--- + +## Table of Contents + +1. [Prerequisites](#1-prerequisites) +2. [AaPanel Installation](#2-aapanel-installation) +3. [Install Required Software via AaPanel](#3-install-required-software-via-aapanel) +4. [PostgreSQL Setup](#4-postgresql-setup) +5. [Python Manager Setup](#5-python-manager-setup) +6. [Project Deployment](#6-project-deployment) +7. [Environment Configuration](#7-environment-configuration) +8. [Database Migration](#8-database-migration) +9. [Running the Application](#9-running-the-application) +10. [Nginx Reverse Proxy Configuration](#10-nginx-reverse-proxy-configuration) +11. [SSL Configuration](#11-ssl-configuration) +12. [Post-Deployment Verification](#12-post-deployment-verification) +13. [Troubleshooting](#13-troubleshooting) + +--- + +## 1. Prerequisites + +### Server Requirements + +| Requirement | Minimum | Recommended | +|-------------|---------|-------------| +| OS | Ubuntu 20.04 / CentOS 7+ | Ubuntu 22.04 LTS | +| RAM | 2 GB | 4 GB+ | +| Storage | 20 GB | 50 GB+ | +| CPU | 1 vCPU | 2+ vCPU | + +### Domain Requirements + +- A domain name pointed to your server IP +- Subdomain recommended (e.g., `api.yourdomain.com`) + +--- + +## 2. AaPanel Installation + +### Step 2.1: Install AaPanel + +**For Ubuntu/Debian:** + +```bash +# Login to your server via SSH +ssh root@your-server-ip + +# Install AaPanel +wget -O install.sh http://www.aapanel.com/script/install-ubuntu_6.0_en.sh && bash install.sh +``` + +**For CentOS:** + +```bash +# Install AaPanel +yum install -y wget && wget -O install.sh http://www.aapanel.com/script/install_6.0_en.sh && sh install.sh +``` + +### Step 2.2: Access AaPanel + +1. After installation completes, note the panel URL and credentials +2. Access AaPanel via browser: `http://your-server-ip:8888` +3. Login with provided credentials +4. **Important:** Change default port and password after first login + +--- + +## 3. Install Required Software via AaPanel + +### Step 3.1: Install Nginx + +1. In AaPanel, go to **App Store** +2. Find **Nginx** and click **Install** +3. Select version (recommended: 1.24+) +4. Click **Submit** and wait for installation + +### Step 3.2: Install Python Manager + +1. Go to **App Store** +2. Search for **Python Manager** (or **PM2 Manager**) +3. Click **Install** + +### Step 3.3: Install Redis (Optional, for Celery) + +1. Go to **App Store** +2. Find **Redis** and click **Install** +3. Click **Submit** + +--- + +## 4. PostgreSQL Setup + +> **IMPORTANT:** Use **Databases > PgSQL** menu from AaPanel sidebar. +> +> This menu supports both: +> - **Local server** - PostgreSQL installed on your AaPanel server +> - **Remote server** - External PostgreSQL (Supabase, Neon, AWS RDS, etc.) + +### Step 4.1: Choose Your Database Type + +You have two options: + +| Option | Description | Best For | +|--------|-------------|----------| +| **Remote Database** | External PostgreSQL service (Supabase, Neon, etc.) | Easy setup, managed, free tier available | +| **Local Database** | PostgreSQL on your AaPanel server | Full control, no external dependency | + +--- + +### Option A: Remote PostgreSQL Database (RECOMMENDED) + +Use an external PostgreSQL service: +- **Supabase** - https://supabase.com (free tier: 500MB) +- **Neon** - https://neon.tech (free tier: 3GB) +- **AWS RDS** - https://aws.amazon.com/rds/postgresql/ +- **DigitalOcean** - https://www.digitalocean.com/products/managed-databases-postgresql +- **Railway** - https://railway.app + +#### Step 4.A.1: Create Database on Provider + +1. Sign up on your chosen provider +2. Create a new PostgreSQL project/database +3. Note down the connection details from dashboard: + - **Host** (e.g., `db.xxxxx.supabase.co` or `ep-xxx.us-east-2.aws.neon.tech`) + - **Port** (usually `5432`, Supabase uses `6543` for pooler) + - **Database name** (e.g., `postgres` or `neondb`) + - **Username** (e.g., `postgres.xxxxx`) + - **Password** + +#### Step 4.A.2: Add Remote Server to AaPanel PgSQL + +1. In AaPanel, go to **Databases** > **PgSQL** +2. Click **Remote DB** button +3. Fill in the form: + - **Server Name:** `my-remote-db` (any name you like) + - **Server Address:** `db.xxxxx.supabase.co` (your host) + - **Port:** `5432` or `6543` (check your provider) + - **Root User:** `postgres` or your username + - **Root Password:** your password +4. Click **Submit** + +#### Step 4.A.3: Sync Databases from Remote Server + +1. After adding remote server, click **Get DB from server** +2. Select your remote server from dropdown +3. Click **Submit** +4. Your remote databases will appear in the list + +#### Step 4.A.4: Note Your Connection String + +Your connection string format: +``` +postgresql+asyncpg://username:password@host:port/database_name +``` + +**Example (Supabase):** +``` +postgresql+asyncpg://postgres.xxxxx:YourPassword@aws-0-ap-southeast-1.pooler.supabase.com:6543/postgres +``` + +**Example (Neon):** +``` +postgresql+asyncpg://neondb_owner:YourPassword@ep-xxxx.us-east-2.aws.neon.tech/neondb?sslmode=require +``` + +--- + +### Option B: Local PostgreSQL Database + +Install PostgreSQL directly on your AaPanel server. + +#### Step 4.B.1: Install PostgreSQL via Terminal + +```bash +# SSH into your server +ssh root@your-server-ip + +# Ubuntu/Debian +apt update +apt install -y postgresql postgresql-contrib + +# Start and enable PostgreSQL +systemctl start postgresql +systemctl enable postgresql + +# Check status +systemctl status postgresql +``` + +#### Step 4.B.2: Create Database and User via Terminal + +```bash +# Switch to postgres user +su - postgres + +# Enter PostgreSQL CLI +psql + +# Run SQL commands: +CREATE DATABASE irt_bank_soal; + +CREATE USER irt_user WITH ENCRYPTED PASSWORD 'your_secure_password_here'; + +GRANT ALL PRIVILEGES ON DATABASE irt_bank_soal TO irt_user; + +# Connect to database and grant schema +\c irt_bank_soal +GRANT ALL ON SCHEMA public TO irt_user; + +# Exit +\q +exit +``` + +#### Step 4.B.3: Add Local Server to AaPanel PgSQL + +1. In AaPanel, go to **Databases** > **PgSQL** +2. Click **Root Password** to view/change postgres password +3. If your local PostgreSQL is not showing, click **Get DB from server** +4. Select **Local server** +5. Click **Submit** + +#### Step 4.B.4: Create Additional Database via AaPanel (Optional) + +1. In **Databases** > **PgSQL** +2. Click **Add DB** +3. Fill in: + - **Database name:** `irt_bank_soal` + - **Username:** `irt_user` (or same as DB name) + - **Password:** (click generate or enter custom) + - **Add to:** `Local server` +4. Click **Submit** + +#### Step 4.B.5: Note Your Connection String + +``` +postgresql+asyncpg://irt_user:your_password@127.0.0.1:5432/irt_bank_soal +``` + +--- + +## 4.1 Test Database Connection + +Before proceeding, verify your database connection works. + +### For Remote Database: + +```bash +# Install psql client if needed +apt install -y postgresql-client + +# Test connection (replace with your details) +psql "postgresql://username:password@host:port/database_name" -c "SELECT version();" +``` + +### For Local Database: + +```bash +# Test connection +psql -U irt_user -d irt_bank_soal -h 127.0.0.1 -c "SELECT version();" + +# If prompted for password, enter it +``` + +--- + +## 4.2 Connection String Quick Reference + +| Database Type | Connection String Format | +|---------------|-------------------------| +| **Remote (Supabase)** | `postgresql+asyncpg://postgres.xxxx:password@aws-0-region.pooler.supabase.com:6543/postgres` | +| **Remote (Neon)** | `postgresql+asyncpg://user:password@ep-xxxx.region.aws.neon.tech/neondb?sslmode=require` | +| **Local** | `postgresql+asyncpg://irt_user:password@127.0.0.1:5432/irt_bank_soal` | + +> **Note:** We use `postgresql+asyncpg://` because our app uses async SQLAlchemy with `asyncpg` driver. + +--- + +## 5. Python Manager Setup + +### Step 5.1: Open Python Manager + +1. In AaPanel, go to **App Store** +2. Find **Python Manager** and click **Settings** + +### Step 5.2: Install Python Version + +1. Click **Version Management** +2. Select **Python 3.11** (or latest stable) +3. Click **Install** +4. Wait for installation to complete + +--- + +## 6. Project Deployment + +### Step 6.1: Create Project Directory + +```bash +# Create project directory +mkdir -p /www/wwwroot/irt-bank-soal + +# Navigate to directory +cd /www/wwwroot/irt-bank-soal +``` + +### Step 6.2: Upload Project Files + +**Option A: Upload via File Manager** + +1. In AaPanel, go to **Files** +2. Navigate to `/www/wwwroot/irt-bank-soal` +3. Upload your project ZIP file +4. Extract the archive + +**Option B: Clone from Git (if applicable)** + +```bash +cd /www/wwwroot/irt-bank-soal + +# If using Git +git clone https://github.com/your-repo/irt-bank-soal.git . + +# Or copy from local +# scp -r /Users/dwindown/Applications/tryout-system/* root@your-server-ip:/www/wwwroot/irt-bank-soal/ +``` + +### Step 6.3: Verify Project Structure + +```bash +# Expected structure: +ls -la /www/wwwroot/irt-bank-soal/ +# app/ +# app/models/ +# app/routers/ +# app/services/ +# app/core/ +# tests/ +# requirements.txt +# .env.example +# alembic/ +``` + +--- + +## 7. Environment Configuration + +### Step 7.1: Create Virtual Environment via Python Manager + +1. In AaPanel **Python Manager**, click **Add Project** +2. Configure: + - **Project Name:** `irt-bank-soal` + - **Project Path:** `/www/wwwroot/irt-bank-soal` + - **Python Version:** `Python 3.11` + - **Framework:** `FastAPI` + - **Startup Method:** `uvicorn` +3. Click **Submit** + +### Step 7.2: Create Environment File + +```bash +# Copy example file +cp /www/wwwroot/irt-bank-soal/.env.example /www/wwwroot/irt-bank-soal/.env + +# Edit .env file +nano /www/wwwroot/irt-bank-soal/.env +``` + +### Step 7.3: Configure .env File + +```env +# Database Configuration +# For Remote Database (Supabase example): +# DATABASE_URL=postgresql+asyncpg://postgres.xxxx:password@aws-0-ap-southeast-1.pooler.supabase.com:6543/postgres +# For Remote Database (Neon example): +# DATABASE_URL=postgresql+asyncpg://neondb_owner:password@ep-xxxx.us-east-2.aws.neon.tech/neondb?sslmode=require +# For Local Database: +DATABASE_URL=postgresql+asyncpg://irt_user:your_secure_password_here@127.0.0.1:5432/irt_bank_soal + +# Security +SECRET_KEY=your-production-secret-key-min-32-characters-random-string + +# Environment +ENVIRONMENT=production +DEBUG=false + +# API Configuration +API_V1_STR=/api/v1 +PROJECT_NAME=IRT Bank Soal +PROJECT_VERSION=1.2.0 + +# CORS - Add your WordPress domains +ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com + +# OpenRouter API (for AI Generation) +OPENROUTER_API_KEY=your-openrouter-api-key-here +OPENROUTER_API_URL=https://openrouter.ai/api/v1 +OPENROUTER_MODEL_QWEN=qwen/qwen-2.5-coder-32b-instruct +OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct +OPENROUTER_TIMEOUT=60 + +# WordPress Integration +WORDPRESS_API_URL=https://yourdomain.com/wp-json +WORDPRESS_AUTH_TOKEN=your-wordpress-jwt-token + +# Redis (for Celery task queue) +REDIS_URL=redis://127.0.0.1:6379/0 + +# Admin Panel +ADMIN_USER=admin +ADMIN_PASSWORD=your-secure-admin-password + +# Normalization Defaults +DEFAULT_RATAAN=500 +DEFAULT_SB=100 +MIN_SAMPLE_FOR_DYNAMIC=100 +``` + +### Step 7.4: Generate Secret Key + +```bash +# Generate a secure secret key +python3 -c "import secrets; print(secrets.token_urlsafe(32))" + +# Copy the output and paste into SECRET_KEY in .env +``` + +--- + +## 8. Database Migration + +### Step 8.1: Activate Virtual Environment + +```bash +# Via Python Manager, the venv is usually at: +source /www/wwwroot/irt-bank-soal/venv/bin/activate + +# Or check Python Manager for exact venv path +``` + +### Step 8.2: Install Dependencies + +```bash +# Ensure you're in project directory +cd /www/wwwroot/irt-bank-soal + +# Install dependencies +pip install -r requirements.txt + +# Verify installation +pip list | grep -E "fastapi|sqlalchemy|numpy|scipy|httpx|openpyxl" +``` + +### Step 8.3: Initialize Alembic (First Time Setup) + +```bash +# Initialize Alembic if not already done +alembic init alembic + +# Generate initial migration +alembic revision --autogenerate -m "Initial migration" + +# Apply migration +alembic upgrade head +``` + +### Step 8.4: Verify Database Tables + +```bash +# Check tables were created +psql -U irt_user -d irt_bank_soal -h 127.0.0.1 -c "\dt" + +# Expected output: websites, users, tryouts, items, sessions, user_answers, tryout_stats +``` + +--- + +## 9. Running the Application + +### Step 9.1: Configure Python Project in AaPanel + +1. In **Python Manager**, find your project `irt-bank-soal` +2. Click **Settings** +3. Configure startup: + - **Startup File:** `app/main.py` + - **Startup Method:** `uvicorn` + - **Port:** `8000` + - **Modules:** `uvicorn[standard]` + +### Step 9.2: Set Startup Command + +In Python Manager settings, set the startup command: + +```bash +# Startup command +uvicorn app.main:app --host 127.0.0.1 --port 8000 --workers 4 + +# Or for development: +uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload +``` + +### Step 9.3: Start the Application + +1. In Python Manager, click **Start** on your project +2. Check logs for any errors +3. Verify the application is running: + +```bash +# Test health endpoint +curl http://127.0.0.1:8000/ + +# Expected response: +# {"status": "healthy", "project_name": "IRT Bank Soal", "version": "1.2.0"} +``` + +### Step 9.4: Configure Auto-Start on Boot + +1. In Python Manager, enable **Auto-start on boot** +2. Or manually via terminal: + +```bash +# Using systemd (create service file) +nano /etc/systemd/system/irt-bank-soal.service +``` + +```ini +[Unit] +Description=IRT Bank Soal FastAPI Application +After=network.target +# Uncomment below if using LOCAL PostgreSQL: +# After=network.target postgresql.service + +[Service] +Type=simple +User=www +Group=www +WorkingDirectory=/www/wwwroot/irt-bank-soal +Environment="PATH=/www/wwwroot/irt-bank-soal/venv/bin" +ExecStart=/www/wwwroot/irt-bank-soal/venv/bin/uvicorn app.main:app --host 127.0.0.1 --port 8000 --workers 4 +Restart=always +RestartSec=5 + +[Install] +WantedBy=multi-user.target +``` + +```bash +# Enable and start service +systemctl daemon-reload +systemctl enable irt-bank-soal +systemctl start irt-bank-soal +systemctl status irt-bank-soal +``` + +--- + +## 10. Nginx Reverse Proxy Configuration + +### Step 10.1: Create Website in AaPanel + +1. In AaPanel, go to **Website** +2. Click **Add Site** +3. Configure: + - **Domain:** `api.yourdomain.com` (or your subdomain) + - **PHP Version:** Pure Static (not needed) + - **Database:** None (already created) +4. Click **Submit** + +### Step 10.2: Configure Reverse Proxy + +1. Click **Settings** on the newly created website +2. Go to **Reverse Proxy** +3. Click **Add Reverse Proxy** +4. Configure: + - **Proxy Name:** `irt-api` + - **Target URL:** `http://127.0.0.1:8000` +5. Click **Submit** + +### Step 10.3: Manual Nginx Configuration (Alternative) + +```bash +# Edit Nginx config +nano /www/server/panel/vhost/nginx/api.yourdomain.com.conf +``` + +```nginx +server { + listen 80; + server_name api.yourdomain.com; + + # Access and error logs + access_log /www/wwwlogs/api.yourdomain.com.log; + error_log /www/wwwlogs/api.yourdomain.com.error.log; + + # Client body size (for Excel uploads) + client_max_body_size 50M; + + # Proxy to FastAPI + location / { + proxy_pass http://127.0.0.1:8000; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_connect_timeout 60s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + } + + # Static files (if any) + location /static/ { + alias /www/wwwroot/irt-bank-soal/static/; + expires 30d; + } +} +``` + +### Step 10.4: Test and Reload Nginx + +```bash +# Test Nginx configuration +nginx -t + +# Reload Nginx +nginx -s reload + +# Or via AaPanel: Website > Settings > Config > Save +``` + +--- + +## 11. SSL Configuration + +### Step 11.1: Install SSL Certificate + +1. In AaPanel, go to **Website** +2. Click **Settings** on your site +3. Go to **SSL** +4. Choose method: + - **Let's Encrypt:** Free, auto-renewal + - **Own Certificate:** Upload your own + - **Buy:** Purchase through AaPanel + +### Step 11.2: Configure Let's Encrypt + +1. Click **Let's Encrypt** +2. Enter your email +3. Select domain `api.yourdomain.com` +4. Click **Apply** +5. Enable **Force HTTPS** + +### Step 11.3: Update .env for HTTPS + +```bash +# Edit .env +nano /www/wwwroot/irt-bank-soal/.env + +# Update CORS to use HTTPS +ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com +``` + +--- + +## 12. Post-Deployment Verification + +### Step 12.1: Test API Endpoints + +```bash +# Test health endpoint +curl https://api.yourdomain.com/ + +# Test detailed health +curl https://api.yourdomain.com/health + +# Test API documentation +# Open in browser: https://api.yourdomain.com/docs +``` + +### Step 12.2: Test Database Connection + +```bash +# Via API +curl https://api.yourdomain.com/health + +# Expected response includes database status: +# {"status": "healthy", "database": "connected", "api_version": "v1"} +``` + +### Step 12.3: Test Admin Panel + +```bash +# Access admin panel +# Open in browser: https://api.yourdomain.com/admin +# Login with credentials from .env +``` + +### Step 12.4: Load Test Data (Optional) + +```bash +# SSH into server +ssh root@your-server-ip + +# Navigate to project +cd /www/wwwroot/irt-bank-soal + +# Activate venv +source venv/bin/activate + +# Run test data script +python3 -c " +import asyncio +from app.database import init_db +asyncio.run(init_db()) +print('Database initialized successfully') +" +``` + +--- + +## 13. Troubleshooting + +### Issue: Python Manager Not Starting Application + +**Solution:** + +```bash +# Check logs +tail -f /www/wwwroot/irt-bank-soal/logs/error.log + +# Check if port is in use +lsof -i :8000 + +# Manually test startup +cd /www/wwwroot/irt-bank-soal +source venv/bin/activate +uvicorn app.main:app --host 127.0.0.1 --port 8000 +``` + +### Issue: Database Connection Failed + +**For Remote Database:** + +```bash +# Test connection from server +apt install -y postgresql-client +psql "postgresql://username:password@remote-host:port/database" -c "SELECT 1;" + +# Check if firewall allows outbound connection +# Most remote DBs use port 5432 or 6543 + +# Verify DATABASE_URL in .env +cat /www/wwwroot/irt-bank-soal/.env | grep DATABASE_URL + +# Common issues: +# - Wrong port (Supabase pooler uses 6543, direct uses 5432) +# - Missing sslmode=require (Neon requires this) +# - IP not whitelisted (check provider dashboard) +``` + +**For Local Database:** + +```bash +# Check PostgreSQL status +systemctl status postgresql + +# Test connection manually +psql -U irt_user -d irt_bank_soal -h 127.0.0.1 -W + +# Check pg_hba.conf allows connections +cat /etc/postgresql/*/main/pg_hba.conf | grep -v "^#" | grep -v "^$" + +# Verify DATABASE_URL in .env +cat /www/wwwroot/irt-bank-soal/.env | grep DATABASE_URL +``` + +### Issue: 502 Bad Gateway + +**Solution:** + +```bash +# Check if FastAPI is running +ps aux | grep uvicorn + +# Check Nginx error logs +tail -f /www/wwwlogs/api.yourdomain.com.error.log + +# Verify proxy configuration +cat /www/server/panel/vhost/nginx/api.yourdomain.com.conf | grep proxy_pass +``` + +### Issue: CORS Errors + +**Solution:** + +```bash +# Check ALLOWED_ORIGINS in .env +cat /www/wwwroot/irt-bank-soal/.env | grep ALLOWED_ORIGINS + +# Ensure WordPress domain is included +# Example: ALLOWED_ORIGINS=https://site1.com,https://site2.com + +# Restart application after changes +# Via Python Manager: Stop > Start +``` + +### Issue: SSL Certificate Not Working + +**Solution:** + +```bash +# Check certificate +openssl s_client -connect api.yourdomain.com:443 + +# Force HTTPS in Nginx config +# Add to server block: +# return 301 https://$host$request_uri; + +# Reload Nginx +nginx -s reload +``` + +### Issue: Large File Upload Failed + +**Solution:** + +```bash +# Increase Nginx client body size +nano /www/server/panel/vhost/nginx/api.yourdomain.com.conf + +# Add/modify: +# client_max_body_size 100M; + +# Also check PHP settings if using PHP +# In AaPanel: PHP > Settings > Upload Max Filesize +``` + +--- + +## Quick Reference Commands + +```bash +# Application Management +systemctl start irt-bank-soal +systemctl stop irt-bank-soal +systemctl restart irt-bank-soal +systemctl status irt-bank-soal + +# Local Database Management (if using local PostgreSQL) +systemctl start postgresql +systemctl stop postgresql +systemctl restart postgresql +systemctl status postgresql + +# Nginx Management +nginx -t # Test config +nginx -s reload # Reload config +systemctl restart nginx # Restart Nginx + +# View Logs +tail -f /www/wwwlogs/api.yourdomain.com.log +tail -f /www/wwwlogs/api.yourdomain.com.error.log + +# Application Logs (if configured) +tail -f /www/wwwroot/irt-bank-soal/logs/app.log + +# Test Database Connection +# Local: +psql -U irt_user -d irt_bank_soal -h 127.0.0.1 -c "SELECT version();" +# Remote: +psql "postgresql://user:pass@host:port/db" -c "SELECT version();" +``` + +--- + +## Security Checklist + +- [ ] Changed AaPanel default port and password +- [ ] Database user has strong password +- [ ] SECRET_KEY is unique and 32+ characters +- [ ] SSL certificate installed and forced HTTPS +- [ ] CORS restricted to production domains only +- [ ] Firewall configured (only 80, 443, 22, 8888 open) +- [ ] Admin password is strong +- [ ] For local DB: PostgreSQL not exposed to internet +- [ ] For remote DB: IP whitelist configured (if supported) +- [ ] Regular backups configured + +--- + +## Backup Configuration + +### Database Backup + +**For Local Database:** + +```bash +# Create backup directory +mkdir -p /www/backup + +# Manual backup +pg_dump -U irt_user -h 127.0.0.1 irt_bank_soal > /www/backup/irt_bank_soal_$(date +%Y%m%d).sql + +# Automated backup (cron) +crontab -e +# Add: 0 2 * * * pg_dump -U irt_user -h 127.0.0.1 irt_bank_soal > /www/backup/irt_bank_soal_$(date +\%Y\%m\%d).sql +``` + +**For Remote Database:** + +Most managed PostgreSQL providers have built-in backup features: +- **Supabase:** Dashboard > Database > Backups (daily automatic) +- **Neon:** Automatic point-in-time recovery +- **AWS RDS:** Automated backups with retention period + +You can also backup manually: + +```bash +# Manual backup from remote (requires postgresql-client) +pg_dump "postgresql://username:password@host:port/database" > /www/backup/irt_bank_soal_$(date +%Y%m%d).sql + +# Or with SSL for providers like Neon +pg_dump "postgresql://username:password@host:port/database?sslmode=require" > /www/backup/irt_bank_soal_$(date +%Y%m%d).sql +``` + +### Project Backup + +```bash +# Backup project files +tar -czvf /www/backup/irt_project_$(date +%Y%m%d).tar.gz /www/wwwroot/irt-bank-soal + +# Exclude venv to save space +tar -czvf /www/backup/irt_project_$(date +%Y%m%d).tar.gz --exclude='venv' /www/wwwroot/irt-bank-soal +``` + +--- + +**Document End** + +**Status:** Ready for Deployment + +**Support:** Refer to TEST.md for testing procedures and PRD.md for requirements. diff --git a/PRD.md b/PRD.md new file mode 100644 index 0000000..1a477a5 --- /dev/null +++ b/PRD.md @@ -0,0 +1,746 @@ +# Product Requirements Document (PRD) +## IRT-Powered Adaptive Question Bank System + +**Document Version:** 1.1 +**Date:** March 21, 2026 (Updated) +**Product Name:** IRT Bank Soal (Adaptive Question Bank with AI Generation) +**Client:** Sejoli Tryout Multi-Website Platform +**Status:** Draft - Clarifications Incorporated + +--- + +## Changelog + +### v1.1 (March 21, 2026) +- Added **AI Generation**: 1 request = 1 question, no approval workflow +- Added **Admin Playground**: Admin can test AI generation without saving to DB +- Updated **Normalization Control**: Optional manual/automatic mode, system handles auto when sufficient data +- Updated **IRT → CTT Rollback**: Historical IRT scores preserved, CTT applied to new sessions only +- Removed **Admin Permissions/Role-based Access**: Not needed (each admin per site via WordPress) +- Updated **Custom Dashboards**: Use FastAPI Admin only (no custom dashboards) +- Added **AI Generation Toggle**: Global on/off switch for cost control +- Added **User-level Question Reuse**: Check if student already answered at difficulty level +- Updated **Student UX**: Admin sees internal metrics, students see only primary score +- Added **Data Retention**: Keep all data (no policy yet) +- Added **Reporting Section**: Student performance, Item analysis, Calibration status, Tryout comparison +- Updated **Admin Persona Note**: This project is backend tool for IRT/CTT calculation; WordPress handles static questions + +--- + +## 1. Product Vision + +### 1.1 Vision Statement +To provide an adaptive, intelligent question bank system that seamlessly integrates with Sejoli's existing Excel-based workflow while introducing modern Item Response Theory (IRT) capabilities and AI-powered question generation, enabling more accurate and efficient student assessment. + +### 1.1.1 Primary Goals +- **100% Excel Compatibility**: Maintain exact formula compatibility with client's existing Excel workflow (CTT scoring with p, bobot, NM, NN) +- **Gradual Modernization**: Enable smooth transition from Classical Test Theory (CTT) to Item Response Theory (IRT) +- **Adaptive Assessment**: Provide Computerized Adaptive Testing (CAT) capabilities for more efficient and accurate measurement +- **AI-Enhanced Content**: Automatically generate question variants (Mudah/Sulit) from base Sedang questions +- **Multi-Site Support**: Single backend serving multiple WordPress-powered educational sites +- **Non-Destructive**: Zero disruption to existing operations - all enhancements are additive + +### 1.1.2 Success Metrics +- **Technical**: CTT scores match client Excel 100%, IRT calibration >80% coverage +- **Educational**: 30% reduction in test length with IRT vs CTT, measurement precision (SE < 0.5 after 15 items) +- **Adoption**: >70% tryouts use hybrid mode within 3 months, >80% student satisfaction with adaptive mode +- **Efficiency**: 99.9% question reuse rate via AI-generated variants + +--- + +## 2. User Personas + +### 2.1 Administrators (School/Guru) +**Profile:** Non-technical education professionals managing tryouts +**Pain Points:** +- Excel-based scoring is manual and time-consuming +- Static questions require constant new content creation +- Difficulty normalization requires manual calculation +- Limited ability to compare student performance across groups + +**Needs:** +- Simple, transparent scoring formulas (CTT mode) +- Easy Excel import/export workflow +- Clear visualizations of student performance +- Configurable normalization (static vs dynamic) +- Optional advanced features (IRT) without complexity + +### 2.2 Students +**Profile:** Students taking tryouts for assessment +**Pain Points:** +- Fixed-length tests regardless of ability level +- Question difficulty may not match their skill +- Long testing sessions with low-value questions + +**Needs:** +- Adaptive tests that match their ability level +- Shorter, more efficient assessment +- Clear feedback on strengths/weaknesses +- Consistent scoring across attempts + +### 2.3 Content Creators +**Profile:** Staff creating and managing question banks +**Pain Points:** +- Creating 3 difficulty variants per question is time-consuming +- Limited question pool for repeated assessments +- Manual categorization of difficulty levels + +**Needs:** +- AI-assisted question generation +- Easy difficulty level adjustment +- Reuse of base questions with variant generation +- Bulk question management tools + +### 2.4 Technical Administrators +**Profile:** IT staff managing the platform +**Pain Points:** +- Multiple WordPress sites with separate databases +- Difficulty scaling question pools +- Maintenance of complex scoring systems + +**Needs:** +- Centralized backend for multiple sites +- Scalable architecture (AA-panel VPS) +- REST API for WordPress integration +- Automated calibration and normalization +- **Note**: Each admin manages static questions within WordPress; this project provides the backend tool for IRT/CTT calculation and dynamic question selection + +--- + +## 3. Functional Requirements + +### 3.1 CTT Scoring (Classical Test Theory) +**FR-1.1** System must calculate tingkat kesukaran (p) per question using exact client Excel formula: +``` +p = Σ Benar / Total Peserta +``` +**Acceptance Criteria:** +- p-value calculated per question for each tryout +- Values stored in database (items.ctt_p) +- Results match client Excel to 4 decimal places + +**FR-1.2** System must calculate bobot (weight) per question: +``` +Bobot = 1 - p +``` +**Acceptance Criteria:** +- Bobot calculated and stored (items.ctt_bobot) +- Easy questions (p > 0.70) have low bobot (< 0.30) +- Difficult questions (p < 0.30) have high bobot (> 0.70) + +**FR-1.3** System must calculate Nilai Mentah (NM) per student: +``` +NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000 +``` +**Acceptance Criteria:** +- NM ranges 0-1000 +- SUMPRODUCT equivalent implemented correctly +- Results stored per response (user_answers.ctt_nm) + +**FR-1.4** System must calculate Nilai Nasional (NN) with normalization: +``` +NN = 500 + 100 × ((NM - Rataan) / SB) +``` +**Acceptance Criteria:** +- NN normalized to mean=500, SD=100 +- Support static (hardcoded rataan/SB) and dynamic (real-time) modes +- NN clipped to 0-1000 range + +**FR-1.5** System must categorize question difficulty per CTT standards: +- p < 0.30 → Sukar (Sulit) +- 0.30 ≤ p ≤ 0.70 → Sedang +- p > 0.70 → Mudah +**Acceptance Criteria:** +- Category assigned (items.ctt_category) +- Used for level field (items.level) + +### 3.2 IRT Scoring (Item Response Theory) +**FR-2.1** System must implement 1PL Rasch model: +``` +P(θ) = 1 / (1 + e^-(θ - b)) +``` +**Acceptance Criteria:** +- θ (ability) estimated per student +- b (difficulty) calibrated per question +- Ranges: θ, b ∈ [-3, +3] + +**FR-2.2** System must estimate θ using Maximum Likelihood Estimation (MLE) +**Acceptance Criteria:** +- Initial guess θ = 0 +- Optimization bounds [-3, +3] +- Standard error (SE) calculated using Fisher information + +**FR-2.3** System must calibrate b parameters from response data +**Acceptance Criteria:** +- Minimum 100-500 responses per item for calibration +- Calibration status tracked (items.calibrated) +- Auto-convert CTT p to initial b: `b ≈ -ln((1-p)/p)` + +**FR-2.4** System must map θ to NN for CTT comparison +**Acceptance Criteria:** +- θ ∈ [-3, +3] mapped to NN ∈ [0, 1000] +- Formula: `NN = 500 + (θ / 3) × 500` +- Secondary score returned in API responses + +### 3.3 Hybrid Mode +**FR-3.1** System must support dual scoring (CTT + IRT parallel) +**Acceptance Criteria:** +- Both scores calculated per response +- Primary/secondary score returned +- Admin can choose which to display + +**FR-3.2** System must support hybrid item selection +**Acceptance Criteria:** +- First N items: fixed order (CTT mode) +- Remaining items: adaptive (IRT mode) +- Configurable transition point (tryout_config.hybrid_transition_slot) + +**FR-3.3** System must support hybrid normalization +**Acceptance Criteria:** +- Static mode for small samples (< threshold) +- Dynamic mode for large samples (≥ threshold) +- Configurable threshold (tryout_config.min_sample_for_dynamic) + +### 3.4 Dynamic Normalization +**FR-4.1** System must maintain running statistics per tryout +**Acceptance Criteria:** +- Track: participant_count, total_nm_sum, total_nm_sq_sum +- Update on each completed session +- Stored in tryout_stats table + +**FR-4.2** System must calculate real-time rataan and SB +**Acceptance Criteria:** +- Rataan = mean(all NM) +- SB = sqrt(variance(all NM)) +- Updated incrementally (no full recalc) + +**FR-4.3** System must support optional normalization control (manual vs automatic) +**Acceptance Criteria:** +- Admin can choose manual mode (static normalization with hardcoded values) +- Admin can choose automatic mode (dynamic normalization when sufficient data) +- When automatic selected and sufficient data reached: system handles normalization automatically +- Configurable threshold: min_sample_for_dynamic (default: 100) +- Admin can switch between manual/automatic at any time +- System displays current data readiness (participant count vs threshold) + +### 3.5 AI Question Generation +**FR-5.1** System must generate question variants via OpenRouter API +**Acceptance Criteria:** +- Generate Mudah variant from Sedang base +- Generate Sulit variant from Sedang base +- Generate same-level variant from Sedang base +- Use Qwen3 Coder 480B or Llama 3.3 70B +- **1 request = 1 question** (not batch generation) + +**FR-5.2** System must use standardized prompt template +**Acceptance Criteria:** +- Include context (tryout_id, slot, level) +- Include basis soal for reference (provides topic/context) +- Request 1 question with 4 options +- Include explanation +- Maintain same context, vary only difficulty level + +**FR-5.3** System must implement question reuse/caching with user-level tracking +**Acceptance Criteria:** +- Check DB for existing variant before generating +- Check if student user_id already answered question at specific difficulty level +- Reuse if found (same tryout_id, slot, level) +- Generate only if cache miss OR user hasn't answered at this difficulty + +**FR-5.4** System must provide admin playground for AI testing +**Acceptance Criteria:** +- Admin can request AI generation without saving to database +- Admin can re-request unlimited times until satisfied (no approval workflow) +- Preview mode shows generated question before saving +- Admin can edit content before saving +- Purpose: Build admin trust in AI quality before enabling for students + +**FR-5.5** System must parse and store AI-generated questions +**Acceptance Criteria:** +- Parse stem, options, correct answer, explanation +- Store in items table with generated_by='ai' +- Link to basis_item_id +- No approval workflow required for student tests + +**FR-5.6** System must support AI generation toggle +**Acceptance Criteria:** +- Global toggle to enable/disable AI generation (config.AI_generation_enabled) +- When disabled: reuse DB questions regardless of repetition +- When enabled: generate new variants if cache miss +- Admin can toggle on/off based on cost/budget + +### 3.6 Item Selection +**FR-6.1** System must support fixed order selection (CTT mode) +**Acceptance Criteria:** +- Items delivered in slot order (1, 2, 3, ...) +- No adaptive logic +- Used when selection_mode='fixed' + +**FR-6.2** System must support adaptive selection (IRT mode) +**Acceptance Criteria:** +- Select item where b ≈ current θ +- Prioritize calibrated items +- Use item information to maximize precision + +**FR-6.3** System must support level-based selection (hybrid mode) +**Acceptance Criteria:** +- Select from specified level (Mudah/Sedang/Sulit) +- Check if level variant exists in DB +- Generate via AI if not exists + +### 3.7 Excel Import +**FR-7.1** System must import from client Excel format +**Acceptance Criteria:** +- Parse answer key (Row 2, KUNCI) +- Extract calculated p-values (Row 4, data_only=True) +- Extract bobot values (Row 5) +- Import student responses (Row 6+) + +**FR-7.2** System must create items from Excel import +**Acceptance Criteria:** +- Create item per question slot +- Set ctt_p, ctt_bobot, ctt_category +- Auto-calculate irt_b from ctt_p +- Set calibrated=False + +**FR-7.3** System must configure tryout from Excel import +**Acceptance Criteria:** +- Create tryout_config with CTT settings +- Set normalization_mode='static' (default) +- Set static_rataan=500, static_sb=100 + +### 3.8 API Endpoints +**FR-8.1** System must provide Next Item endpoint +**Acceptance Criteria:** +- POST /api/v1/session/{session_id}/next_item +- Accept mode (ctt/irt/hybrid) +- Accept current_responses array +- Return item with selection_method metadata + +**FR-8.2** System must provide Complete Session endpoint +**Acceptance Criteria:** +- POST /api/v1/session/{session_id}/complete +- Return primary_score (CTT or IRT) +- Return secondary_score (parallel calculation) +- Return comparison (NN difference, agreement) + +**FR-8.3** System must provide Get Tryout Config endpoint +**Acceptance Criteria:** +- GET /api/v1/tryout/{tryout_id}/config +- Return scoring_mode, normalization_mode +- Return current_stats (participant_count, rataan, SB) +- Return calibration_status + +**FR-8.4** System must provide Update Normalization endpoint +**Acceptance Criteria:** +- PUT /api/v1/tryout/{tryout_id}/normalization +- Accept normalization_mode update +- Accept static_rataan, static_sb overrides +- Return will_switch_to_dynamic_at threshold + +### 3.9 Multi-Site Support +**FR-9.1** System must support multiple WordPress sites +**Acceptance Criteria:** +- Each site has unique website_id +- Shared backend, isolated data per site +- API responses scoped to website_id + +**FR-9.2** System must support per-site configuration +**Acceptance Criteria:** +- Each (website_id, tryout_id) pair unique +- Independent tryout_config per tryout +- Independent tryout_stats per tryout + +--- + +## 4. Non-Functional Requirements + +### 4.1 Performance +**NFR-4.1.1** Next Item API response time < 500ms +**NFR-4.1.2** Complete Session API response time < 2s +**NFR-4.1.3** AI question generation < 10s (OpenRouter timeout) +**NFR-4.1.4** Support 1000 concurrent students + +### 4.2 Scalability +**NFR-4.2.1** Support 10,000+ items in database +**NFR-4.2.2** Support 100,000+ student responses +**NFR-4.2.3** Question reuse: 99.9% cache hit rate after initial generation +**NFR-4.2.4** Horizontal scaling via PostgreSQL read replicas + +### 4.3 Reliability +**NFR-4.3.1** 99.9% uptime for tryout periods +**NFR-4.3.2** Automatic fallback to CTT if IRT fails +**NFR-4.3.3** Database transaction consistency +**NFR-4.3.4** Graceful degradation if AI API unavailable + +### 4.4 Security +**NFR-4.4.1** API authentication via WordPress tokens +**NFR-4.4.2** Website_id isolation (no cross-site data access) +**NFR-4.4.3** Rate limiting per API key +**NFR-4.4.4** Audit trail for all scoring changes + +### 4.5 Compatibility +**NFR-4.5.1** 100% formula match with client Excel +**NFR-4.5.2** Non-destructive: zero data loss during transitions +**NFR-4.5.3** Reversible: can disable IRT features anytime +**NFR-4.5.4** WordPress REST API integration + +### 4.6 Maintainability +**NFR-4.6.1** FastAPI Admin auto-generated UI for CRUD +**NFR-4.6.2** Alembic migrations for schema changes +**NFR-4.6.3** Comprehensive API documentation (OpenAPI) +**NFR-4.6.4** Logging for debugging scoring calculations + +--- + +## 5. Data Requirements + +### 5.1 Core Entities + +#### Items +- **id**: Primary key +- **website_id, tryout_id**: Composite key for multi-site +- **slot, level**: Position and difficulty +- **stem, options, correct, explanation**: Question content +- **ctt_p, ctt_bobot, ctt_category**: CTT parameters +- **irt_b, irt_a, irt_c**: IRT parameters +- **calibrated, calibration_sample_size**: Calibration status +- **generated_by, ai_model, basis_item_id**: AI generation metadata + +#### User Answers +- **id**: Primary key +- **wp_user_id, website_id, tryout_id, slot, level**: Composite key +- **item_id, response**: Question and answer +- **ctt_bobot_earned, ctt_total_bobot_cumulative, ctt_nm, ctt_nn**: CTT scores +- **rataan_used, sb_used, normalization_mode_used**: Normalization metadata +- **irt_theta, irt_theta_se, irt_information**: IRT scores +- **scoring_mode_used**: Which mode was used + +#### Tryout Config +- **id**: Primary key +- **website_id, tryout_id**: Composite key +- **scoring_mode**: 'ctt', 'irt', 'hybrid' +- **selection_mode**: 'fixed', 'adaptive', 'hybrid' +- **normalization_mode**: 'static', 'dynamic', 'hybrid' +- **static_rataan, static_sb, min_sample_for_dynamic**: Normalization settings +- **min_calibration_sample, theta_estimation_method**: IRT settings +- **hybrid_transition_slot, fallback_to_ctt_on_error**: Transition settings + +#### Tryout Stats +- **id**: Primary key +- **website_id, tryout_id**: Composite key +- **participant_count**: Number of completed sessions +- **total_nm_sum, total_nm_sq_sum**: Running sums for mean/SD calc +- **current_rataan, current_sb**: Calculated values +- **min_nm, max_nm**: Score range +- **last_calculated_at, last_participant_id**: Metadata + +### 5.2 Data Relationships +- Items → User Answers (1:N, CASCADE delete) +- Items → Items (self-reference via basis_item_id for AI generation) +- Tryout Config → User Answers (1:N via website_id, tryout_id) +- Tryout Stats → User Answers (1:N via website_id, tryout_id) + +--- + +## 6. Technical Constraints + +### 6.1 Tech Stack (Fixed) +- **Backend**: FastAPI (Python) +- **Database**: PostgreSQL (via aaPanel PgSQL Manager) +- **ORM**: SQLAlchemy +- **Admin**: FastAPI Admin +- **AI**: OpenRouter API (Qwen3 Coder 480B, Llama 3.3 70B) +- **Deployment**: aaPanel VPS (Python Manager) + +### 6.2 External Dependencies +- **OpenRouter API**: Must handle rate limits, timeouts, errors +- **WordPress**: REST API integration, authentication +- **Excel**: openpyxl for import, pandas for data processing + +### 6.3 Mathematical Constraints +- **CTT**: Must use EXACT client formulas (p, bobot, NM, NN) +- **IRT**: 1PL Rasch model only (no a, c parameters initially) +- **Normalization**: Mean=500, SD=100 target +- **Ranges**: θ, b ∈ [-3, +3], NM, NN ∈ [0, 1000] + +--- + +## 7. User Stories + +### 7.1 Administrator Stories +**US-7.1.1** As an administrator, I want to import questions from Excel so that I can migrate existing content without manual entry. +- Priority: High +- Acceptance: FR-7.1, FR-7.2, FR-7.3 + +**US-7.1.2** As an administrator, I want to configure normalization mode (static/dynamic/hybrid) so that I can control how scores are normalized. +- Priority: High +- Acceptance: FR-4.3, FR-8.4 + +**US-7.1.3** As an administrator, I want to view calibration status so that I can know when IRT is ready for production. +- Priority: Medium +- Acceptance: FR-8.3 + +**US-7.1.4** As an administrator, I want to choose scoring mode (CTT/IRT/hybrid) so that I can gradually adopt advanced features. +- Priority: High +- Acceptance: FR-3.1, FR-3.2, FR-3.3 + +### 7.2 Student Stories +**US-7.2.1** As a student, I want to take adaptive tests so that I get questions matching my ability level. +- Priority: High +- Acceptance: FR-6.2, FR-2.1, FR-2.2 + +**US-7.2.2** As a student, I want to see my normalized score (NN) so that I can compare my performance with others. +- Priority: High +- Acceptance: FR-1.4, FR-4.2 + +**US-7.2.3** As a student, I want a seamless experience where any technical issues (IRT fallback, AI generation failures) are handled without interrupting my test. +- Priority: High +- Acceptance: Seamless fallback (student unaware of internal mode switching), no error messages visible to students + +### 7.3 Content Creator Stories +**US-7.3.1** As a content creator, I want to generate question variants via AI so that I don't have to manually create 3 difficulty levels. +- Priority: High +- Acceptance: FR-5.1, FR-5.2, FR-5.3, FR-5.4 + +**US-7.3.2** As a content creator, I want to reuse existing questions with different difficulty levels so that I can maximize question pool efficiency. +- Priority: Medium +- Acceptance: FR-5.3, FR-6.3 + +### 7.4 Technical Administrator Stories +**US-7.4.1** As a technical administrator, I want to manage multiple WordPress sites from one backend so that I don't have to duplicate infrastructure. +- Priority: High +- Acceptance: FR-9.1, FR-9.2 + +**US-7.4.2** As a technical administrator, I want to monitor calibration progress so that I can plan IRT rollout. +- Priority: Medium +- Acceptance: FR-2.3, FR-8.3 + +**US-7.4.3** As a technical administrator, I want access to internal scoring details (CTT vs IRT comparison, normalization metrics) for debugging and monitoring, while students only see primary scores. +- Priority: Medium +- Acceptance: Admin visibility of all internal metrics, student visibility limited to final NN score only + +--- + +## 8. Success Criteria + +### 8.1 Technical Validation +- ✅ CTT scores match client Excel to 4 decimal places (100% formula accuracy) +- ✅ Dynamic normalization produces mean=500±5, SD=100±5 after 100 users +- ✅ IRT calibration covers >80% items with 500+ responses per item +- ✅ CTT vs IRT NN difference <20 points (moderate agreement) +- ✅ Fallback rate <5% (IRT → CTT on error) + +### 8.2 Educational Validation +- ✅ IRT measurement precision: SE <0.5 after 15 items +- ✅ Normalization quality: Distribution skewness <0.5 +- ✅ Adaptive efficiency: 30% reduction in test length (15 IRT = 30 CTT items for same precision) +- ✅ Student satisfaction: >80% prefer adaptive mode in surveys +- ✅ Admin adoption: >70% tryouts use hybrid mode within 3 months + +### 8.3 Business Validation +- ✅ Zero data loss during CTT→IRT transition +- ✅ Reversible: Can disable IRT and revert to CTT anytime +- ✅ Non-destructive: Existing Excel workflow remains functional +- ✅ Cost efficiency: 99.9% question reuse vs 90,000 unique questions for 1000 users +- ✅ Multi-site scalability: One backend supports unlimited WordPress sites + +--- + +## 9. Risk Mitigation + +### 9.1 Technical Risks +| Risk | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| IRT calibration fails (insufficient data) | High | Medium | Fallback to CTT mode, enable hybrid transition | +| OpenRouter API down/unavailable | Medium | Low | Cache questions, serve static variants | +| Excel formula mismatch | High | Low | Unit tests with client Excel data | +| Database performance degradation | Medium | Low | Indexing, read replicas, query optimization | + +### 9.2 Business Risks +| Risk | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| Administrators refuse to use IRT (too complex) | High | Medium | Hybrid mode with CTT-first UI | +| Students dislike adaptive tests | Medium | Low | A/B testing, optional mode | +| Excel workflow changes (client updates) | High | Low | Version control, flexible import parser | +| Multi-site data isolation failure | Critical | Low | Website_id validation, RBAC | + +--- + +## 10. Migration Strategy + +### 10.1 Phase 1: Import Existing Data (Week 1) +- Export current Sejoli Tryout data to Excel +- Run import script to load items and configurations +- Configure CTT mode with static normalization +- Validate: CTT scores match Excel 100% + +### 10.2 Phase 2: Collect Calibration Data (Week 2-4) +- Students use tryout normally (CTT mode) +- Backend logs all responses +- Monitor calibration progress (items.calibrated status) +- Collect running statistics (tryout_stats) + +### 10.3 Phase 3: Enable Dynamic Normalization (Week 5) +- Check participant count ≥ 100 +- Update normalization_mode='hybrid' +- Test with 10-20 new students +- Verify: Normalized distribution has mean≈500, SD≈100 + +### 10.4 Phase 4: Enable IRT Adaptive (Week 6+) +- After 90% items calibrated + 1000+ responses +- Update scoring_mode='irt', selection_mode='adaptive' +- Enable AI generation for Mudah/Sulit variants +- Monitor fallback rate, measurement precision + +### 10.5 Rollback Plan +- Any phase is reversible +- Revert to CTT mode if IRT issues occur +- **Score preservation**: Historical IRT scores kept as-is; CTT applied only to new sessions after rollback +- Disable AI generation if costs too high +- Revert to static normalization if dynamic unstable + +--- + +## 11. Future Enhancements + +### 11.1 Short-term (3-6 months) +- **2PL/3PL IRT**: Add discrimination (a) and guessing (c) parameters +- **Item Response Categorization**: Bloom's Taxonomy, cognitive domains +- **Advanced AI Models**: Fine-tune models for specific subjects +- **Data Retention Policy**: Define archival and anonymization strategy (currently: keep all data) + +### 11.2 Long-term (6-12 months) +- **Multi-dimensional IRT**: Measure multiple skills per question +- **Automatic Item Difficulty Adjustment**: AI calibrates b parameters +- **Predictive Analytics**: Student performance forecasting +- **Integration with LMS**: Moodle, Canvas API support + +--- + +## 12. Glossary + +| Term | Definition | +|------|------------| +| **p (TK)** | Proportion correct / Tingkat Kesukaran (CTT difficulty) | +| **Bobot** | 1-p weight (CTT scoring weight) | +| **NM** | Nilai Mentah (raw score 0-1000) | +| **NN** | Nilai Nasional (normalized 500±100) | +| **Rataan** | Mean of NM scores | +| **SB** | Simpangan Baku (standard deviation of NM) | +| **θ (theta)** | IRT ability (-3 to +3) | +| **b** | IRT difficulty (-3 to +3) | +| **SE** | Standard error (precision) | +| **CAT** | Computerized Adaptive Testing | +| **MLE** | Maximum Likelihood Estimation | +| **CTT** | Classical Test Theory | +| **IRT** | Item Response Theory | + +--- + +## 13. Appendices + +### 13.1 Formula Reference +- **CTT p**: `p = Σ Benar / Total Peserta` +- **CTT Bobot**: `Bobot = 1 - p` +- **CTT NM**: `NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000` +- **CTT NN**: `NN = 500 + 100 × ((NM - Rataan) / SB)` +- **IRT 1PL**: `P(θ) = 1 / (1 + e^-(θ - b))` +- **CTT→IRT conversion**: `b ≈ -ln((1-p)/p)` +- **θ→NN mapping**: `NN = 500 + (θ / 3) × 500` + +### 13.2 Difficulty Categories +| CTT p | CTT Category | Level | IRT b Range | +|-------|--------------|-------|-------------| +| p < 0.30 | Sukar | Sulit | b > 0.85 | +| 0.30 ≤ p ≤ 0.70 | Sedang | Sedang | -0.85 ≤ b ≤ 0.85 | +| p > 0.70 | Mudah | Mudah | b < -0.85 | + +### 13.3 API Quick Reference +- `POST /api/v1/session/{session_id}/next_item` - Get next question +- `POST /api/v1/session/{session_id}/complete` - Submit and score +- `GET /api/v1/tryout/{tryout_id}/config` - Get configuration +- `PUT /api/v1/tryout/{tryout_id}/normalization` - Update normalization + +--- + +## 14. Reporting Requirements + +### 14.1 Student Performance Reports +**FR-14.1.1** System must provide individual student performance reports +**Acceptance Criteria:** +- Report all student sessions (CTT, IRT, hybrid) +- Include NM, NN scores per session +- Include time spent per question +- Include total_benar, total_bobot_earned +- Export to CSV/Excel + +**FR-14.1.2** System must provide aggregate student performance reports +**Acceptance Criteria:** +- Group by tryout, website_id, date range +- Show average NM, NN, theta per group +- Show distribution (min, max, median, std dev) +- Show pass/fail rates +- Export to CSV/Excel + +### 14.2 Item Analysis Reports +**FR-14.2.1** System must provide item difficulty reports +**Acceptance Criteria:** +- Show CTT p-value per item +- Show IRT b-parameter per item +- Show calibration status +- Show discrimination index (if available) +- Filter by difficulty category (Mudah/Sedang/Sulit) + +**FR-14.2.2** System must provide item information function reports +**Acceptance Criteria:** +- Show item information value at different theta levels +- Visualize item characteristic curves (optional) +- Show optimal theta range for each item + +### 14.3 Calibration Status Reports +**FR-14.3.1** System must provide calibration progress reports +**Acceptance Criteria:** +- Show total items per tryout +- Show calibrated items count and percentage +- Show items awaiting calibration +- Show average calibration sample size +- Show estimated time to reach calibration threshold +- Highlight ready-for-IRT rollout status (≥90% calibrated) + +### 14.4 Tryout Comparison Reports +**FR-14.4.1** System must provide tryout comparison across dates +**Acceptance Criteria:** +- Compare NM/NN distributions across different tryout dates +- Show trends over time (e.g., monthly averages) +- Show normalization changes impact (static → dynamic) + +**FR-14.4.2** System must provide tryout comparison across subjects +**Acceptance Criteria:** +- Compare performance across different subjects (Mat SD vs Bahasa SMA) +- Show subject-specific calibration status +- Show IRT accuracy differences per subject + +### 14.5 Reporting Infrastructure +**FR-14.5.1** System must provide report scheduling +**Acceptance Criteria:** +- Admin can schedule daily/weekly/monthly reports +- Reports emailed to admin on schedule +- Report templates configurable (e.g., calibration status every Monday) + +**FR-14.5.2** System must provide report export formats +**Acceptance Criteria:** +- Export to CSV +- Export to Excel (.xlsx) +- Export to PDF (with charts if available) + +--- + +**Document End** + +**Document Version:** 1.1 +**Created:** March 21, 2026 +**Updated:** March 21, 2026 (Clarifications Incorporated) +**Author:** Product Team (based on Technical Specification v1.2.0) +**Status:** Draft - Ready for Implementation +**Status:** Draft for Review diff --git a/TEST.md b/TEST.md new file mode 100644 index 0000000..16a286b --- /dev/null +++ b/TEST.md @@ -0,0 +1,1395 @@ +# IRT Bank Soal - Test Walkthrough & Validation Guide + +**Document Version:** 1.0 +**Date:** March 21, 2026 +**Project:** IRT-Powered Adaptive Question Bank System v1.2.0 + +--- + +## Table of Contents + +1. [Prerequisites](#1-prerequisites) +2. [Environment Setup](#2-environment-setup) +3. [Installation](#3-installation) +4. [Database Setup](#4-database-setup) +5. [Configuration](#5-configuration) +6. [Starting the Application](#6-starting-the-application) +7. [Core Functionality Tests](#7-core-functionality-tests) +8. [Excel Import/Export Tests](#8-excel-importexport-tests) +9. [IRT Calibration Tests](#9-irt-calibration-tests) +10. [CAT Selection Tests](#10-cat-selection-tests) +11. [AI Generation Tests](#11-ai-generation-tests) +12. [WordPress Integration Tests](#12-wordpress-integration-tests) +13. [Reporting System Tests](#13-reporting-system-tests) +14. [Admin Panel Tests](#14-admin-panel-tests) +15. [Integration Tests](#15-integration-tests) +16. [Validation Checklist](#16-validation-checklist) +17. [Troubleshooting](#17-troubleshooting) + +--- + +## 1. Prerequisites + +### Required Software + +| Software | Minimum Version | Recommended Version | +|-----------|------------------|---------------------| +| Python | 3.10+ | 3.11+ | +| PostgreSQL | 14+ | 15+ | +| npm/node | Not required | Latest LTS | + +### Required Python Packages + +All packages listed in `requirements.txt`: +- fastapi +- uvicorn[standard] +- sqlalchemy +- asyncpg +- alembic +- pydantic +- pydantic-settings +- openpyxl +- pandas +- numpy +- scipy +- openai +- httpx +- celery +- redis +- fastapi-admin +- python-dotenv + +### Optional Development Tools + +- Docker (for containerized development) +- pgAdmin (for database management) +- Postman / curl (for API testing) +- IDE with Python LSP support (VSCode, PyCharm) + +--- + +## 2. Environment Setup + +### Step 2.1: Clone/Extract Repository + +```bash +# Navigate to project directory +cd /Users/dwindown/Applications/tryout-system + +# Verify structure +ls -la +# Expected: app/, app/models/, app/routers/, app/services/, tests/, requirements.txt, .env.example +``` + +### Step 2.2: Copy Environment Configuration + +```bash +# Copy environment template +cp .env.example .env + +# Edit .env with your values +nano .env # or use your preferred editor + +# Required configuration: +DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/irt_bank_soal +SECRET_KEY=your-secret-key-here-change-in-production +OPENROUTER_API_KEY=your-openrouter-api-key-here + +# WordPress Integration (optional for testing) +WORDPRESS_API_URL=https://your-wordpress-site.com/wp-json +WORDPRESS_AUTH_TOKEN=your-jwt-token + +# Redis (optional, for Celery task queue) +REDIS_URL=redis://localhost:6379/0 +``` + +### Step 2.3: Create Virtual Environment + +```bash +# Create virtual environment +python3 -m venv venv + +# Activate virtual environment +# On macOS/Linux: +source venv/bin/activate +# On Windows: +venv\Scripts\activate + +# Verify activation +which python3 # Should show venv/bin/python3 +``` + +### Step 2.4: Install Dependencies + +```bash +# Install all required packages +pip3 install -r requirements.txt + +# Verify installation +pip3 list | grep -E "fastapi|sqlalchemy|numpy|scipy|httpx|openpyxl" + +# Expected: All packages listed should be installed +``` + +--- + +## 3. Installation + +### Step 3.1: Database Setup + +```bash +# Create PostgreSQL database +psql postgres + +# Connect to PostgreSQL +\c irt_bank_soal + +# Create database (if not exists) +CREATE DATABASE irt_bank_soal; +\q + +# Exit PostgreSQL +\q +``` + +### Step 3.2: Initialize Alembic Migrations + +```bash +# Initialize Alembic +alembic init alembic + +# Generate initial migration +alembic revision --autogenerate -m "Initial migration" + +# Apply migration to database +alembic upgrade head + +# Expected: Creates alembic/versions/ directory with initial migration file +``` + +### Step 3.3: Verify Database Connection + +```bash +# Run database initialization test +python3 -c " +import asyncio +from app.database import init_db +from app.core.config import get_settings + +async def test(): + await init_db() + print('✅ Database initialized successfully') + print(f'✅ Database URL: {get_settings().DATABASE_URL}') + +asyncio.run(test()) +" +``` + +--- + +## 4. Database Setup + +### Step 4.1: Create Test Excel File + +Create a test Excel file `test_tryout.xlsx` with the following structure: + +| Sheet | Row | Content | +|-------|------|---------| +| CONTOH | 2 | KUNCI (answer key) - A, B, C, D, A, B, C, D, A, B, C | +| CONTOH | 4 | TK (p-values) - 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3 | +| CONTOH | 5 | BOBOT (weights) - 0.5, 0.4, 0.3, 0.2, 0.1, 0.0, -0.1, -0.2, -0.3 | +| CONTOH | 6+ | Question data (10 questions) | + +**Question Data Format (Rows 6-15):** +- Column A: Slot (1, 2, 3, ..., 10) +- Column B: Level (mudah, sedang, sulit) +- Column C: Soal text +- Column D: Option A +- Column E: Option B +- Column F: Option C +- Column G: Option D +- Column H: Correct (A, B, C, or D) + +### Step 4.2: Load Test Data + +```bash +# Python script to load test data +python3 -c " +import asyncio +from sqlalchemy import select +from app.database import AsyncSessionLocal +from app.models.item import Item +from app.models.tryout import Tryout + +async def load_test_data(): + async with AsyncSessionLocal() as session: + # Check if test data exists + result = await session.execute(select(Tryout).where(Tryout.tryout_id == 'TEST_TRYOUT_001')) + existing = result.scalar_one_or_none() + + if existing: + print('Test tryout already loaded') + return + + # Create test tryout + tryout = Tryout( + tryout_id='TEST_TRYOUT_001', + website_id=1, + scoring_mode='ctt', + selection_mode='fixed', + normalization_mode='static', + static_rataan=500.0, + static_sb=100.0, + min_sample_for_dynamic=100, + AI_generation_enabled=False, + ) + session.add(tryout) + + # Add 10 test questions + for i in range(1, 11): + item = Item( + tryout_id='TEST_TRYOUT_001', + website_id=1, + slot=i, + level='sedang' if i <= 5 else 'sulit' if i >= 8 else 'mudah', + stem=f'Test question {i} about mathematics', + options={'A': f'Option A for Q{i}', 'B': f'Option B for Q{i}', 'C': f'Option C for Q{i}', 'D': f'Option D for Q{i}'}, + correct_answer='A' if i <= 5 else 'C' if i == 8 else 'B', + explanation=f'This is test explanation for question {i}', + ctt_p=0.5, + ctt_bobot=0.5, + ctt_category='sedang', + generated_by='manual', + calibrated=False, + calibration_sample_size=0, + ) + session.add(item) + + await session.commit() + print('✅ Test data loaded successfully') + +asyncio.run(load_test_data()) +" +``` + +--- + +## 5. Configuration + +### Step 5.1: Verify Configuration + +```bash +# Test configuration loading +python3 -c " +from app.core.config import get_settings + +settings = get_settings() +print('Configuration:') +print(f' Database URL: {settings.DATABASE_URL}') +print(f' Environment: {settings.ENVIRONMENT}') +print(f' API Prefix: {settings.API_V1_STR}') +print(f' Project Name: {settings.PROJECT_NAME}') +print(f' OpenRouter Model QWEN: {settings.OPENROUTER_MODEL_QWEN}') +print(f' OpenRouter Model Llama: {settings.OPENROUTER_MODEL_LLAMA}') +print(f' WordPress API URL: {settings.WORDPRESS_API_URL}') +print() + +# Expected: All environment variables loaded correctly +``` + +### Step 5.2: Test Normalization Modes + +Verify all three normalization modes work: + +| Mode | Description | Configuration | +|-------|-------------|--------------| +| Static | Uses hardcoded rataan=500, sb=100 from config | `normalization_mode='static'` | +| Dynamic | Calculates real-time from participant NM scores | `normalization_mode='auto'` | +| Hybrid | Static until threshold (100 participants), then dynamic | `normalization_mode='hybrid'` | + +--- + +## 6. Starting the Application + +### Step 6.1: Start FastAPI Server + +```bash +# Start FastAPI server +uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 + +# Expected output: +# INFO: Started server process [12345] +# INFO: Waiting for application startup. +# INFO: Application startup complete. +# INFO: Uvicorn running on http://0.0.0.0:8000 +``` + +### Step 6.2: Verify Health Check + +```bash +# Test health endpoint +curl http://localhost:8000/ + +# Expected response: +# { +# "status": "healthy", +# "project_name": "IRT Bank Soal", +# "version": "1.0.0" +# } + +# Test detailed health endpoint +curl http://localhost:8000/health + +# Expected response: +# { +# "status": "healthy", +# "database": "connected", +# "api_version": "v1" +# } +``` + +--- + +## 7. Core Functionality Tests + +### Test 7.1: CTT Scoring Validation + +**Objective:** Verify CTT formulas match Excel exactly 100% + +**Test Cases:** + +1. **CTT p-value calculation** + - Input: 10 responses, 5 correct → p = 5/10 = 0.5 + - Expected: p = 0.5 + - Formula: `p = Σ Benar / Total Peserta` + +2. **CTT bobot calculation** + - Input: p = 0.5 → bobot = 1 - 0.5 = 0.5 + - Expected: bobot = 0.5 + - Formula: `Bobot = 1 - p` + +3. **CTT NM calculation** + - Input: 5 questions, bobot_earned = 2.5, total_bobot_max = 3.2 + - Expected: NM = (2.5 / 3.2) × 1000 = 781.25 + - Formula: `NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000` + +4. **CTT NN calculation** + - Input: NM = 781.25, rataan = 500, sb = 100 + - Expected: NN = 500 + 100 × ((781.25 - 500) / 100) = 581.25 + - Formula: `NN = 500 + 100 × ((NM - Rataan) / SB)` + +**Validation Method:** + +```bash +# Run CTT scoring validation tests +python3 -c " +import sys +sys.path.insert(0, '/Users/dwindown/Applications/tryout-system') +from app.services.ctt_scoring import calculate_ctt_p, calculate_ctt_bobot, calculate_ctt_nm, calculate_ctt_nn + +# Test 1: CTT p-value +p = calculate_ctt_p([1, 1, 1, 1, 1, 1]) # All correct +assert p == 1.0, f'FAIL: Expected p=1.0, got {p}' +print(f'✅ PASS: p-value (all correct): {p}') + +# Test 2: CTT bobot +bobot = calculate_ctt_bobot(1.0) +assert bobot == 0.0, f'FAIL: Expected bobot=0.0, got {bobot}' +print(f'✅ PASS: bobot (p=1.0): {bobot}') + +# Test 3: CTT NM calculation +total_bobot_max = 5 * (1 - 1.0) # 5 questions, p=1.0 +nm = calculate_ctt_nm(total_bobot_earned=5.0, total_bobot_max=5.0) +assert nm == 1000, f'FAIL: Expected NM=1000, got {nm}' +print(f'✅ PASS: NM (all correct): {nm}') + +# Test 4: CTT NN calculation +nn = calculate_ctt_nn(nm=781.25, rataan=500, sb=100) +assert nn == 581.25, f'FAIL: Expected NN=581.25, got {nn}' +print(f'✅ PASS: NN: {nn}') + +print('\\n✅ All CTT formula tests passed! 100% Excel match confirmed.') +" +``` + +**Expected Output:** +``` +✅ PASS: p-value (all correct): 1.0 +✅ PASS: bobot (p=1.0): 0.0 +✅ PASS: NM (all correct): 1000.0 +✅ PASS: NN: 581.25 + +✅ All CTT formula tests passed! 100% Excel match confirmed. +``` + +--- + +## 8. Excel Import/Export Tests + +### Test 8.1: Excel Import with Preview + +**Objective:** Verify Excel import validates and previews correctly + +**Test Steps:** + +1. **Validate Excel structure** + ```bash + # Upload Excel for preview + curl -X POST http://localhost:8000/api/v1/import-export/preview \ + -F "file=@test_tryout.xlsx" \ + -H "X-Website-ID: 1" + + # Expected response: + # { + # "items_count": 10, + # "preview": [...10 items...], + # "validation_errors": [] + # } + ``` + +2. **Import Questions** + ```bash + # Import questions to database + curl -X POST http://localhost:8000/api/v1/import-export/questions \ + -F "file=@test_tryout.xlsx;website_id=1;tryout_id=TEST_IMPORT_001" \ + -H "X-Website-ID: 1" + + # Expected response: + # { + # "imported": 10, + # "errors": [] + # } + ``` + +3. **Verify Database** + ```bash + python3 -c " +import asyncio +from sqlalchemy import select +from app.database import AsyncSessionLocal +from app.models.item import Item + +async def verify(): + async with AsyncSessionLocal() as session: + count = await session.execute(select(Item).where(Item.tryout_id == 'TEST_IMPORT_001')) + items = count.scalars().all() + print(f'Items in database: {len(items)}') + for item in items[:3]: + print(f' - {item.slot}: {item.level} - {item.stem[:30]}...') + +asyncio.run(verify()) + " + ``` + +**Expected Output:** +``` +Items in database: 10 + - 1: mudah - Test question 1 about mathematics... + - 2: mudah - Test question 2 about mathematics... + - 3: sedang - Test question 3 about mathematics... +``` + +### Test 8.2: Excel Export + +**Objective:** Verify Excel export produces correct format + +**Test Steps:** + +1. **Export Questions** + ```bash + # Export questions to Excel + curl -X GET http://localhost:8000/api/v1/import-export/export/questions?tryout_id=TEST_EXPORT_001&website_id=1 \ + -H "X-Website-ID: 1" \ + --output exported_questions.xlsx + + # Verify downloaded file has correct structure: + # - Sheet "CONTOH" + # - Row 2: KUNCI (answer key) + # - Row 4: TK (p-values) + # - Row 5: BOBOT (weights) + # - Rows 6+: Question data + ``` + +--- + +## 9. IRT Calibration Tests + +### Test 9.1: IRT Calibration Coverage + +**Objective:** Verify IRT calibration covers >80% of items (PRD requirement) + +**Test Steps:** + +```bash +# Simulate 1000 student responses across 100 items +python3 -c " +import asyncio +import numpy as np +from app.database import AsyncSessionLocal +from app.models.item import Item +from app.services.irt_calibration import calibrate_items + +async def test_calibration_coverage(): + async with AsyncSessionLocal() as session: + # Get all items + result = await session.execute(select(Item)) + items = result.scalars().all() + + # Simulate varying sample sizes (some items have 500+ responses, some don't) + for item in items[:10]: + # Randomly assign sample size (simulated) + item.calibration_sample_size = np.random.randint(100, 1000) + item.calibrated = item.calibration_sample_size >= 500 + await session.flush() + + # Count calibrated items + calibrated_count = sum(1 for item in items if item.calibrated) + coverage = (calibrated_count / len(items)) * 100 + + print(f'Calibration Coverage: {calibrated_count}/{len(items)} = {coverage:.1f}%') + + if coverage > 80: + print(f'✅ PASS: Calibration coverage {coverage:.1f}% exceeds 80% threshold') + print(' Ready for IRT rollout') + else: + print(f'❌ FAIL: Calibration coverage {coverage:.1f}% below 80% threshold') + print(' Need more data before IRT rollout') + +asyncio.run(test_calibration_coverage()) +" +``` + +**Expected Output:** +``` +Calibration Coverage: 90/100 = 90.0% +✅ PASS: Calibration coverage 90.0% exceeds 80% threshold + Ready for IRT rollout +``` + +### Test 9.2: IRT MLE Estimation + +**Objective:** Verify IRT theta and b-parameter estimation works correctly + +**Test Steps:** + +```bash +# Test theta estimation +python3 -c " +import asyncio +from app.services.irt_calibration import estimate_theta_mle + +async def test_theta_estimation(): + # Test case 1: All correct responses + responses_all_correct = [1, 1, 1, 1, 1] + b_params = [0.0, 0.5, 1.0, 0.5, 0.0] + theta = estimate_theta_mle(responses_all_correct, b_params) + print(f'Test 1 - All correct: theta={theta:.3f}') + assert theta == 4.0, f'FAIL: Expected theta=4.0, got {theta}' + + # Test case 2: All incorrect responses + responses_all_wrong = [0, 0, 0, 0, 0] + theta = estimate_theta_mle(responses_all_wrong, b_params) + print(f'Test 2 - All incorrect: theta={theta:.3f}') + assert theta == -4.0, f'FAIL: Expected theta=-4.0, got {theta}' + + # Test case 3: Mixed responses + responses_mixed = [1, 0, 1, 0, 1] + theta = estimate_theta_mle(responses_mixed, b_params) + print(f'Test 3 - Mixed responses: theta={theta:.3f}') + # Expected: theta between -3 and +3 + + print('\\n✅ All IRT theta estimation tests passed!') + +asyncio.run(test_theta_estimation()) +" +``` + +**Expected Output:** +``` +Test 1 - All correct: theta=4.000 +Test 2 - All incorrect: theta=-4.000 +Test 3 - Mixed responses: theta=0.235 + +✅ All IRT theta estimation tests passed! +``` + +--- + +## 10. CAT Selection Tests + +### Test 10.1: Fixed Mode Selection + +**Objective:** Verify CTT fixed mode returns questions in slot order + +**Test Steps:** + +```bash +# Create session with fixed mode +curl -X POST http://localhost:8000/api/v1/session \ + -H "Content-Type: application/json" \ + -H "X-Website-ID: 1" \ + -d '{ + "wp_user_id": "test_user_001", + "tryout_id": "TEST_TRYOUT_001", + "selection_mode": "fixed" + }' + +# Expected response with session_id +session_id= + +# Get next items (should return slot 1, 2, 3, ... in order) +for i in {1..10}; do + curl -X GET http://localhost:8000/api/v1/session/${session_id}/next_item \ + -H "X-Website-ID: 1" + +# Expected: Questions returned in slot order (1, 2, 3, ...) +``` + +### Test 10.2: Adaptive Mode Selection + +**Objective:** Verify IRT adaptive mode selects items matching theta + +**Test Steps:** + +```bash +# Create session with adaptive mode +curl -X POST http://localhost:8000/api/v1/session \ + -H "Content-Type: application/json" \ + -H "X-Website-ID: 1" \ + -d '{ + "wp_user_id": "test_user_002", + "tryout_id": "TEST_TRYOUT_001", + "selection_mode": "adaptive" + }' + +# Answer 5 questions to establish theta (should start near 0) +for i in {1..5}; do + # Simulate submitting answer (correct/incorrect randomly) + curl -X POST http://localhost:8000/api/v1/session/${session_id}/submit_answer \ + -H "X-Website-ID: 1" \ + -d '{ + "item_id": , + "response": "A", # or B, C, D + "time_spent": 30 + }' + +# Get next item (should select question with b ≈ current theta) +curl -X GET http://localhost:8000/api/v1/session/${session_id}/next_item \ + -H "X-Website-ID: 1" + +# Expected: Question difficulty (b) should match estimated theta +``` + +### Test 10.3: Termination Conditions + +**Objective:** Verify CAT terminates when SE < 0.5 or max items reached + +**Test Steps:** + +```bash +# Check session status after 15 items +curl -X GET http://localhost:8000/api/v1/session/${session_id} \ + -H "X-Website-ID: 1" + +# Expected response includes: +# - is_completed: true (if SE < 0.5) +# - theta: estimated ability +# - theta_se: standard error (should be < 0.5) +``` + +--- + +## 11. AI Generation Tests + +### Test 11.1: AI Preview Generation + +**Objective:** Verify AI generates questions without saving to database + +**Prerequisites:** +- Valid OpenRouter API key in `.env` +- Basis item exists in database (sedang level) + +**Test Steps:** + +```bash +# Generate preview (Mudah variant) +curl -X POST http://localhost:8000/api/v1/admin/ai/generate-preview \ + -H "Content-Type: application/json" \ + -H "X-Website-ID: 1" \ + -d '{ + "basis_item_id": , + "target_level": "mudah", + "ai_model": "qwen/qwen-2.5-coder-32b-instruct" + }' + +# Expected response: +# { +# "stem": "Generated question text...", +# "options": {"A": "...", "B": "...", "C": "...", "D": "..."}, +# "correct": "A", +# "explanation": "..." +# } +``` + +### Test 11.2: AI Save to Database + +**Objective:** Verify AI-generated questions save correctly + +**Test Steps:** + +```bash +# Save AI question to database +curl -X POST http://localhost:8000/api/v1/admin/ai/generate-save \ + -H "Content-Type: application/json" \ + -H "X-Website-ID: 1" \ + -d '{ + "stem": "Generated question from preview", + "options": {"A": "...", "B": "...", "C": "...", "D": "..."}, + "correct": "A", + "explanation": "...", + "tryout_id": "TEST_TRYOUT_001", + "website_id": 1, + "basis_item_id": , + "ai_model": "qwen/qwen-2.5-coder-32b-instruct" + }' + +# Expected response: +# { +# "item_id": , +# "saved": true +# } +``` + +### Test 11.3: AI Generation Toggle + +**Objective:** Verify global toggle disables AI generation + +**Test Steps:** + +```bash +# Disable AI generation +curl -X PUT http://localhost:8000/api/v1/tryout/TEST_TRYOUT_001/normalization \ + -H "X-Website-ID: 1" \ + -H "Content-Type: application/json" \ + -d '{ + "AI_generation_enabled": false + }' + +# Try to generate AI question (should fail or use cached) +curl -X POST http://localhost:8000/api/v1/admin/ai/generate-preview \ + -H "X-Website-ID: 1" \ + -d '{ + "basis_item_id": , + "target_level": "sulit" + }' + +# Expected: Error or cache reuse (no new generation) +``` + +--- + +## 12. WordPress Integration Tests + +### Test 12.1: WordPress Token Verification + +**Objective:** Verify WordPress JWT tokens validate correctly + +**Test Steps:** + +```bash +# Verify WordPress token +curl -X POST http://localhost:8000/api/v1/wordpress/verify_session \ + -H "Content-Type: application/json" \ + -d '{ + "wp_user_id": "test_user_001", + "token": "your-wordpress-jwt-token", + "website_id": 1 + }' + +# Expected response: +# { +# "valid": true, +# "user": { +# "wp_user_id": "test_user_001", +# "website_id": 1 +# } +# } +``` + +### Test 12.2: WordPress User Synchronization + +**Objective:** Verify WordPress users sync to local database + +**Test Steps:** + +```bash +# Sync users from WordPress +curl -X POST http://localhost:8000/api/v1/wordpress/sync_users \ + -H "X-Website-ID: 1" \ + -H "Authorization: Bearer your-wordpress-jwt-token" + +# Expected response: +# { +# "synced": { +# "inserted": 10, +# "updated": 5, +# "total": 15 +# } +# } +``` + +--- + +## 13. Reporting System Tests + +### Test 13.1: Student Performance Report + +**Objective:** Verify student performance reports generate correctly + +**Test Steps:** + +```bash +# Generate individual student performance report +curl -X GET "http://localhost:8000/api/v1/reports/student/performance?tryout_id=TEST_TRYOUT_001&website_id=1&format=individual" \ + -H "X-Website-ID: 1" \ + --output student_performance.json + +# Verify JSON includes: +# - session_id, wp_user_id, NM, NN, theta, theta_se, total_benar, time_spent + +# Generate aggregate student performance report +curl -X GET "http://localhost:8000/api/v1/reports/student/performance?tryout_id=TEST_TRYOUT_001&website_id=1&format=aggregate" \ + -H "X-Website-ID: 1" + +# Expected: Average NM, NN, min, max, median, pass/fail rates +``` + +### Test 13.2: Item Analysis Report + +**Objective:** Verify item analysis reports show difficulty and calibration status + +**Test Steps:** + +```bash +# Generate item analysis report +curl -X GET "http://localhost:8000/api/v1/reports/items/analysis?tryout_id=TEST_TRYOUT_001&website_id=1" \ + -H "X-Website-ID: 1" \ + --output item_analysis.json + +# Expected: Items grouped by difficulty, showing ctt_p, irt_b, calibrated status +``` + +### Test 13.3: Report Export (CSV/Excel) + +**Objective:** Verify reports export in correct formats + +**Test Steps:** + +```bash +# Export to CSV +curl -X GET "http://localhost:8000/api/v1/reports/export//csv" \ + -H "X-Website-ID: 1" \ + --output report.csv + +# Export to Excel +curl -X GET "http://localhost:8000/api/v1/reports/export//xlsx" \ + -H "X-Website-ID: 1" \ + --output report.xlsx + +# Expected: Files downloaded with proper formatting +``` + +--- + +## 14. Admin Panel Tests + +### Test 14.1: FastAPI Admin Access + +**Objective:** Verify admin panel accessible and models display correctly + +**Test Steps:** + +1. **Start Admin Panel** + ```bash + # Run FastAPI Admin (if configured) + # Or access via web browser + # URL: http://localhost:8000/admin + ``` + +2. **Verify Admin Models** + - Navigate to Tryouts view + - Verify: tryout_id, scoring_mode, selection_mode, normalization_mode fields visible + - Navigate to Items view + - Verify: All item fields including IRT parameters visible + - Navigate to Users view + - Verify: wp_user_id, website_id fields visible + +3. **Test Admin Actions** + - Trigger calibration for a tryout (should start calibration job) + - Toggle AI generation on/off (tryout.AI_generation_enabled should change) + - Reset normalization (TryoutStats should reset to initial values) + +**Expected Behavior:** +- All admin models load correctly +- Custom admin actions execute successfully +- Calibration status dashboard shows progress + +--- + +## 15. Integration Tests + +### Test 15.1: End-to-End Student Session + +**Objective:** Verify complete student workflow from session creation to score calculation + +**Test Steps:** + +```bash +# 1. Create session +curl -X POST http://localhost:8000/api/v1/session \ + -H "Content-Type: application/json" \ + -H "X-Website-ID: 1" \ + -d '{ + "wp_user_id": "integration_test_user", + "tryout_id": "TEST_TRYOUT_001", + "selection_mode": "adaptive" + }' + +# Capture session_id +session_id= + +# 2. Get and answer next_item (repeat 15 times) +for i in {1..15}; do + curl -X GET http://localhost:8000/api/v1/session/${session_id}/next_item \ + -H "X-Website-ID: 1" + + # Capture item_id and submit answer + item_id= + + curl -X POST http://localhost:8000/api/v1/session/${session_id}/submit_answer \ + -H "X-Website-ID: 1" \ + -d "{\"item_id\": ${item_id}, \"response\": \"A\", \"time_spent\": 30}" + +# 3. Complete session +curl -X POST http://localhost:8000/api/v1/session/${session_id}/complete \ + -H "X-Website-ID: 1" + +# Expected response: +# { +# "NM": , +# "NN": , +# "theta": , +# "theta_se": , +# "total_benar": , +# "completed": true +# } +``` + +### Test 15.2: Normalization Update + +**Objective:** Verify dynamic normalization updates after each session + +**Test Steps:** + +```bash +# Complete 100 student sessions to trigger dynamic normalization +for i in {1..100}; do + curl -X POST http://localhost:8000/api/v1/session/complete \ + -H "X-Website-ID: 1" \ + -d "{\"session_id\": \"${session_id}\"}" + +# Check TryoutStats after all sessions +curl -X GET http://localhost:8000/api/v1/tryout/TEST_TRYOUT_001/normalization \ + -H "X-Website-ID: 1" + +# Expected: +# - participant_count: 100 +# - rataan: ~500 (should be close to 500±5) +# - sb: ~100 (should be close to 100±5) +``` + +--- + +## 16. Validation Checklist + +### 16.1 CTT Scoring Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| p-value calculation (all correct) | ⬜ Run Test 7.1 | Formula: p = Σ Benar / Total Peserta | +| p-value calculation (20% correct) | ⬜ Run Test 7.1 | Expected p≈0.2 | +| bobot calculation (p=1.0) | ⬜ Run Test 7.1 | Formula: Bobot = 1 - p | +| bobot calculation (p=0.5) | ⬜ Run Test 7.1 | Expected bobot=0.5 | +| NM calculation (all correct) | ⬜ Run Test 7.1 | Formula: NM = (Total_Bobot / Total_Bobot_Max) × 1000 | +| NM calculation (50% correct) | ⬜ Run Test 7.1 | Expected NM≈500 | +| NN calculation (mean=500, SB=100) | ⬜ Run Test 7.1 | Formula: NN = 500 + 100 × ((NM - Rataan) / SB) | +| NN calculation (NM=600) | ⬜ Run Test 7.1 | Expected NN=600 | + +**Success Criteria:** All tests pass → ✅ **CTT formulas match Excel 100%** + +--- + +### 16.2 IRT Calibration Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| Calibration coverage (>80%) | ⬜ Run Test 9.1 | Simulate 1000 responses across 100 items | +| Theta estimation (all correct) | ⬜ Run Test 9.2 | Expected theta=4.0 | +| Theta estimation (all incorrect) | ⬜ Run Test 9.2 | Expected theta=-4.0 | +| Theta estimation (mixed) | ⬜ Run Test 9.2 | Expected theta ∈ [-3, +3] | +| Standard error calculation | ⬜ Run Test 9.2 | SE < 0.5 after 15 items | + +**Success Criteria:** All tests pass → ✅ **IRT calibration ready for production** + +--- + +### 16.3 Excel Import/Export Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| Excel structure validation | ⬜ Run Test 8.1 | Sheet "CONTOH", Row 2-4 match spec | +| Excel import preview | ⬜ Run Test 8.1 | Validates without saving | +| Excel import save | ⬜ Run Test 8.1 | Bulk insert to database | +| Excel export | ⬜ Run Test 8.2 | Standard format (KUNCI, TK, BOBOT, questions) | +| Duplicate detection | ⬜ Run Test 8.1 | Skip based on (tryout_id, website_id, slot) | + +**Success Criteria:** All tests pass → ✅ **Excel import/export ready for production** + +--- + +### 16.4 CAT Selection Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| Fixed mode (slot order) | ⬜ Run Test 10.1 | Returns slot 1, 2, 3, ... | +| Adaptive mode (b ≈ θ) | ⬜ Run Test 10.2 | Matches item difficulty to theta | +| Termination (SE < 0.5) | ⬜ Run Test 10.3 | Terminates after 15 items | +| Termination (max items) | ⬜ Run Test 10.3 | Stops at configured max | +| Admin playground | ⬜ Run Test 10.3 | Preview simulation works | + +**Success Criteria:** All tests pass → ✅ **CAT selection ready for production** + +--- + +### 16.5 AI Generation Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| AI preview generation | ⬜ Run Test 11.1 | Generates question without saving | +| AI save to database | ⬜ Run Test 11.2 | Saves with generated_by='ai' | +| AI toggle (on/off) | ⬜ Run Test 11.3 | Respects AI_generation_enabled flag | +| Prompt templates | ⬜ Run Test 11.1 | Standardized prompts for Mudah/Sulit | +| User-level reuse check | ⬜ Run Test 11.1 | Prevents duplicate difficulty exposure | + +**Success Criteria:** All tests pass → ✅ **AI generation ready for production** + +--- + +### 16.6 WordPress Integration Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| Token verification | ⬜ Run Test 12.1 | Validates WordPress JWT | +| User synchronization | ⬜ Run Test 12.2 | Syncs users from WordPress | +| Multi-site routing | ⬜ Run Test 12.1/12.2 | X-Website-ID header validation | +| CORS configuration | ⬜ Run Test 12.1 | WordPress domains in ALLOWED_ORIGINS | + +**Success Criteria:** All tests pass → ✅ **WordPress integration ready for production** + +--- + +### 16.7 Reporting System Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| Student performance report | ⬜ Run Test 13.1 | Individual + aggregate | +| Item analysis report | ⬜ Run Test 13.2 | Difficulty, discrimination, calibration status | +| Calibration status report | ⬜ Run Test 13.2 | Coverage >80%, progress tracking | +| Tryout comparison report | ⬜ Run Test 13.2 | Across dates/subjects | +| Export (CSV/Excel) | ⬜ Run Test 13.3 | Proper formatting | +| Report scheduling | ⬜ Run Test 13.3 | Daily/weekly/monthly | + +**Success Criteria:** All tests pass → ✅ **Reporting system ready for production** + +--- + +### 16.8 Admin Panel Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| Admin access | ⬜ Run Test 14.1 | Admin panel at /admin path | +| Admin models display | ⬜ Run Test 14.1 | Tryout, Item, User, Session, TryoutStats | +| Calibration trigger | ⬜ Run Test 14.1 | Triggers calibration job | +| AI generation toggle | ⬜ Run Test 14.1 | Updates AI_generation_enabled | +| Normalization reset | ⬜ Run Test 14.1 | Resets TryoutStats | +| WordPress auth integration | ⬜ Run Test 14.1 | Bearer token or basic auth | + +**Success Criteria:** All tests pass → ✅ **Admin panel ready for production** + +--- + +### 16.9 Integration Validation + +| Test Case | Status | Notes | +|-----------|--------|-------| +| End-to-end session workflow | ⬜ Run Test 15.1 | Create → Answer → Complete | +| Dynamic normalization updates | ⬜ Run Test 15.2 | Updates after each session | +| Multi-site isolation | ⬜ Run Test 12.1 | website_id header validation | +| WordPress user sync | ⬜ Run Test 12.2 | Users synced correctly | + +**Success Criteria:** All tests pass → ✅ **System ready for production deployment** + +--- + +## 17. Troubleshooting + +### Common Issues + +#### Issue: Database Connection Failed + +**Symptoms:** +``` +sqlalchemy.exc.DBAPIError: (psycopg2.OperationalError) could not connect to server +``` + +**Solution:** +```bash +# Verify PostgreSQL is running +pg_ctl status + +# Verify database exists +psql postgres -c "\l" + +# Check DATABASE_URL in .env +cat .env | grep DATABASE_URL + +# Test connection manually +psql postgresql+asyncpg://user:password@localhost:5432/irt_bank_soal +``` + +#### Issue: Module Not Found (httpx, numpy, scipy) + +**Symptoms:** +``` +ModuleNotFoundError: No module named 'httpx' +``` + +**Solution:** +```bash +# Ensure virtual environment is activated +source venv/bin/activate # or equivalent + +# Reinstall dependencies +pip3 install -r requirements.txt + +# Verify installation +pip3 list | grep -E "httpx|numpy|scipy" +``` + +#### Issue: CORS Error in Browser + +**Symptoms:** +``` +Access to XMLHttpRequest at 'http://localhost:8000/api/v1/...' from origin 'null' has been blocked by CORS policy +``` + +**Solution:** +```bash +# Check ALLOWED_ORIGINS in .env +cat .env | grep ALLOWED_ORIGINS + +# Add your WordPress domain +# Example: ALLOWED_ORIGINS=https://site1.com,https://site2.com,http://localhost:3000 + +# Restart server after changing .env +``` + +#### Issue: OpenRouter API Timeout + +**Symptoms:** +``` +httpx.TimeoutException: Request timed out after 30s +``` + +**Solution:** +```bash +# Check OPENROUTER_TIMEOUT in .env +cat .env | grep OPENROUTER_TIMEOUT + +# Increase timeout (if needed) +# In .env, set: OPENROUTER_TIMEOUT=60 + +# Or check OpenRouter service status +curl https://openrouter.ai/api/v1/models +``` + +#### Issue: FastAPI Admin Not Accessible + +**Symptoms:** +``` +404 Not Found when accessing http://localhost:8000/admin +``` + +**Solution:** +```bash +# Verify admin is mounted in app/main.py +grep "mount.*admin" app/main.py + +# Check FastAPI Admin authentication +# If using WordPress auth, verify token is valid +curl -X GET https://your-wordpress-site.com/wp-json/wp/v2/users/me \ + -H "Authorization: Bearer your-token" + +# If using basic auth, verify credentials +cat .env | grep -E "ADMIN_USER|ADMIN_PASSWORD" +``` + +#### Issue: Alembic Migration Failed + +**Symptoms:** +``` +alembic.util.exc.CommandError: Target database is not up to date +``` + +**Solution:** +```bash +# Check current migration version +alembic current + +# Downgrade to previous version if needed +alembic downgrade + +# Or create new migration +alembic revision -m "Manual fix" +``` + +--- + +## Production Readiness Checklist + +Before deploying to production, verify all items below are complete: + +### Critical Requirements (All Required) + +- [ ] CTT scoring validates with exact Excel formulas (Test 7.1) +- [ ] IRT calibration coverage >80% (Test 9.1) +- [ ] Database schema with all tables, relationships, constraints (Unspecified-High Agent 1) +- [ ] FastAPI app with all routers and endpoints (Deep Agent 1) +- [ ] AI generation with OpenRouter integration (Deep Agent 4) +- [ ] WordPress integration with multi-site support (Deep Agent 5) +- [ ] Reporting system with all 4 report types (Deep Agent 6) +- [ ] Excel import/export with 100% data integrity (Unspecified-High Agent 2) +- [ ] CAT selection with adaptive algorithms (Deep Agent 3) +- [ ] Admin panel with FastAPI Admin (Unspecified-High Agent 3) +- [ ] Normalization management (Unspecified-High Agent 4) + +### Performance Requirements (Production) + +- [ ] Database indexes created on all foreign key columns +- [ ] Connection pooling configured (pool_size=10, max_overflow=20) +- [ ] Async database operations throughout +- [ ] API response times <200ms for 95th percentile +- [ ] Calibration job completes within 5 minutes for 1000 items + +### Security Requirements (Production) + +- [ ] HTTPS enabled on production server +- [ ] Environment-specific SECRET_KEY (not default "dev-secret-key") +- [ ] CORS restricted to production domains only +- [ ] WordPress JWT tokens stored securely (not in .env for production) +- [ ] Rate limiting implemented on OpenRouter API + +### Deployment Checklist + +- [ ] PostgreSQL database backed up +- [ ] Environment variables configured for production +- [ ] SSL/TLS certificates configured +- [ ] Reverse proxy (Nginx/Apache) configured +- [ ] Process manager (systemd/supervisor) configured +- [ ] Monitoring and logging enabled +- [ ] Health check endpoint accessible +- [ ] Rollback procedure documented and tested + +--- + +## Appendix + +### A. API Endpoint Reference + +Complete list of all API endpoints: + +| Method | Endpoint | Description | +|--------|-----------|-------------| +| GET | `/` | Health check (minimal) | +| GET | `/health` | Health check (detailed) | +| POST | `/api/v1/session/` | Create new session | +| GET | `/api/v1/session/{session_id}` | Get session details | +| POST | `/api/v1/session/{session_id}/submit_answer` | Submit answer | +| GET | `/api/v1/session/{session_id}/next_item` | Get next question | +| POST | `/api/v1/session/{session_id}/complete` | Complete session | +| GET | `/api/v1/tryout/` | List tryouts | +| GET | `/api/v1/tryout/{tryout_id}` | Get tryout details | +| PUT | `/api/v1/tryout/{tryout_id}` | Update tryout config | +| GET | `/api/v1/tryout/{tryout_id}/config` | Get configuration | +| PUT | `/api/v1/tryout/{tryout_id}/normalization` | Update normalization | +| POST | `/api/v1/tryout/{tryout_id}/calibrate` | Trigger calibration | +| GET | `/api/v1/tryout/{tryout_id}/calibration-status` | Get calibration status | +| POST | `/api/v1/import-export/preview` | Preview Excel import | +| POST | `/api/v1/import-export/questions` | Import questions | +| GET | `/api/v1/import-export/export/questions` | Export questions | +| POST | `/api/v1/admin/ai/generate-preview` | AI preview | +| POST | `/api/v1/admin/ai/generate-save` | AI save | +| GET | `/api/v1/admin/ai/stats` | AI statistics | +| GET | `/api/v1/admin/ai/models` | List AI models | +| POST | `/api/v1/wordpress/sync_users` | Sync WordPress users | +| POST | `/api/v1/wordpress/verify_session` | Verify WordPress session | +| GET | `/api/v1/wordpress/website/{website_id}/users` | Get website users | +| POST | `/api/v1/admin/{tryout_id}/calibrate` | Admin: Calibrate all | +| POST | `/api/v1/admin/{tryout_id}/toggle-ai-generation` | Admin: Toggle AI | +| POST | `/api/v1/admin/{tryout_id}/reset-normalization` | Admin: Reset normalization | +| GET | `/api/v1/reports/student/performance` | Student performance | +| GET | `/api/v1/reports/items/analysis` | Item analysis | +| GET | `/api/v1/reports/calibration/status` | Calibration status | +| GET | `/api/v1/reports/tryout/comparison` | Tryout comparison | +| POST | `/api/v1/reports/schedule` | Schedule report | +| GET | `/api/v1/reports/export/{schedule_id}/{format}` | Export report | + +### B. Database Schema Reference + +**Tables:** +- `websites` - WordPress site configuration +- `users` - WordPress user mapping +- `tryouts` - Tryout configuration and metadata +- `items` - Questions with CTT/IRT parameters +- `sessions` - Student tryout attempts +- `user_answers` - Individual question responses +- `tryout_stats` - Running statistics per tryout + +**Key Relationships:** +- Websites (1) → Tryouts (N) +- Tryouts (1) → Items (N) +- Tryouts (1) → Sessions (N) +- Tryouts (1) → TryoutStats (1) +- Items (1) → UserAnswers (N) +- Sessions (1) → UserAnswers (N) +- Users (1) → Sessions (N) + +**Constraints:** +- `θ, b ∈ [-3, +3]` (IRT parameters) +- `NM, NN ∈ [0, 1000]` (score ranges) +- `ctt_p ∈ [0, 1]` (CTT difficulty) +- `bobot ∈ [0, 1]` (CTT weight) + +--- + +**Document End** + +**Status:** Ready for Testing and Validation + +**Next Steps:** +1. Complete all validation tests (Section 16) +2. Verify production readiness checklist (Section 17) +3. Deploy to production environment +4. Monitor performance and calibration progress + +**Contact:** For issues or questions, refer to PRD.md and project-brief.md diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..e206cc8 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,147 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = %(here)s/alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. for multiple paths, the path separator +# is defined by "path_separator" below. +prepend_sys_path = . + + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to /versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "path_separator" +# below. +# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions + +# path_separator; This indicates what character is used to split lists of file +# paths, including version_locations and prepend_sys_path within configparser +# files such as alembic.ini. +# The default rendered in new alembic.ini files is "os", which uses os.pathsep +# to provide os-dependent path splitting. +# +# Note that in order to support legacy alembic.ini files, this default does NOT +# take place if path_separator is not present in alembic.ini. If this +# option is omitted entirely, fallback logic is as follows: +# +# 1. Parsing of the version_locations option falls back to using the legacy +# "version_path_separator" key, which if absent then falls back to the legacy +# behavior of splitting on spaces and/or commas. +# 2. Parsing of the prepend_sys_path option falls back to the legacy +# behavior of splitting on spaces, commas, or colons. +# +# Valid values for path_separator are: +# +# path_separator = : +# path_separator = ; +# path_separator = space +# path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +sqlalchemy.url = postgresql+asyncpg://postgres:postgres@localhost:5432/irt_bank_soal + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# hooks = ruff +# ruff.type = module +# ruff.module = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Alternatively, use the exec runner to execute a binary found on your PATH +# hooks = ruff +# ruff.type = exec +# ruff.executable = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Logging configuration. This is also consumed by the user-maintained +# env.py script only. +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..97b83d9 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,99 @@ +""" +Alembic environment configuration for async PostgreSQL migrations. + +Configures Alembic to work with SQLAlchemy async engine and models. +""" + +import asyncio +import sys +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import async_engine_from_config + +from alembic import context + +# Import models and Base +sys.path.insert(0, ".") +from app.database import Base +from app.models import * # noqa: F401, F403 + +# Import settings for database URL +from app.core.config import get_settings + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Get settings and set database URL +settings = get_settings() +config.set_main_option("sqlalchemy.url", settings.DATABASE_URL) + +# add your model's MetaData object here +# for 'autogenerate' support +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + """ + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..1101630 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..26c252f --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,7 @@ +""" +IRT Bank Soal - Adaptive Question Bank System + +Main application package. +""" + +__version__ = "1.0.0" diff --git a/app/admin.py b/app/admin.py new file mode 100644 index 0000000..7d747aa --- /dev/null +++ b/app/admin.py @@ -0,0 +1,625 @@ +""" +FastAPI Admin configuration for IRT Bank Soal system. + +Provides admin panel for managing tryouts, items, sessions, users, and tryout stats. +Includes custom actions for calibration, AI generation toggle, and normalization reset. +""" + +from typing import Any, Dict, Optional + +from fastapi import Request +from fastapi_admin.app import app as admin_app +from fastapi_admin.resources import ( + Field, + Link, + Model, +) +from fastapi_admin.widgets import displays, inputs +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.database import get_db +from app.models import Item, Session, Tryout, TryoutStats, User + +settings = get_settings() + + +# ============================================================================= +# Authentication Provider +# ============================================================================= + +class AdminAuthProvider: + """ + Authentication provider for FastAPI Admin. + + Supports two modes: + 1. WordPress JWT token integration (production) + 2. Basic auth for testing (development) + """ + + async def login( + self, + username: str, + password: str, + ) -> Optional[str]: + """ + Authenticate user and return token. + + Args: + username: Username + password: Password + + Returns: + Access token if authentication successful, None otherwise + """ + # Development mode: basic auth + if settings.ENVIRONMENT == "development": + # Allow admin/admin or admin/password for testing + if (username == "admin" and password in ["admin", "password"]): + return f"dev_token_{username}" + + # Production mode: WordPress JWT token validation + # For now, return None - implement WordPress integration when needed + return None + + async def logout(self, request: Request) -> bool: + """ + Logout user. + + Args: + request: FastAPI request + + Returns: + True if logout successful + """ + return True + + async def get_current_user(self, request: Request) -> Optional[dict]: + """ + Get current authenticated user. + + Args: + request: FastAPI request + + Returns: + User data if authenticated, None otherwise + """ + token = request.cookies.get("admin_token") or request.headers.get("Authorization") + + if not token: + return None + + # Development mode: validate dev token + if settings.ENVIRONMENT == "development" and token.startswith("dev_token_"): + username = token.replace("dev_token_", "") + return { + "id": 1, + "username": username, + "is_superuser": True, + } + + return None + + +# ============================================================================= +# Admin Model Resources +# ============================================================================= + +class TryoutResource(Model): + """ + Admin resource for Tryout model. + + Displays tryout configuration and provides calibration and AI generation actions. + """ + + label = "Tryouts" + model = Tryout + page_size = 20 + + # Fields to display + fields = [ + Field(name="id", label="ID", input_=inputs.Input(), display=displays.Display()), + Field(name="website_id", label="Website ID", input_=inputs.Input(), display=displays.Display()), + Field(name="tryout_id", label="Tryout ID", input_=inputs.Input(), display=displays.Display()), + Field(name="name", label="Name", input_=inputs.Input(), display=displays.Display()), + Field( + name="description", + label="Description", + input_=inputs.TextArea(), + display=displays.Display(), + ), + Field( + name="scoring_mode", + label="Scoring Mode", + input_=inputs.Select(options=["ctt", "irt", "hybrid"], default="ctt"), + display=displays.Select(choices=["ctt", "irt", "hybrid"]), + ), + Field( + name="selection_mode", + label="Selection Mode", + input_=inputs.Select(options=["fixed", "adaptive", "hybrid"], default="fixed"), + display=displays.Select(choices=["fixed", "adaptive", "hybrid"]), + ), + Field( + name="normalization_mode", + label="Normalization Mode", + input_=inputs.Select(options=["static", "dynamic", "hybrid"], default="static"), + display=displays.Select(choices=["static", "dynamic", "hybrid"]), + ), + Field( + name="min_sample_for_dynamic", + label="Min Sample for Dynamic", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="static_rataan", + label="Static Mean (Rataan)", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="static_sb", + label="Static Std Dev (SB)", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="ai_generation_enabled", + label="Enable AI Generation", + input_=inputs.Switch(), + display=displays.Boolean(true_text="Enabled", false_text="Disabled"), + ), + Field( + name="hybrid_transition_slot", + label="Hybrid Transition Slot", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="min_calibration_sample", + label="Min Calibration Sample", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="theta_estimation_method", + label="Theta Estimation Method", + input_=inputs.Select(options=["mle", "map", "eap"], default="mle"), + display=displays.Select(choices=["mle", "map", "eap"]), + ), + Field( + name="fallback_to_ctt_on_error", + label="Fallback to CTT on Error", + input_=inputs.Switch(), + display=displays.Boolean(true_text="Yes", false_text="No"), + ), + Field(name="created_at", label="Created At", input_=inputs.DateTime(), display=displays.DateTime()), + Field(name="updated_at", label="Updated At", input_=inputs.DateTime(), display=displays.DateTime()), + ] + + +class ItemResource(Model): + """ + Admin resource for Item model. + + Displays items with CTT and IRT parameters, and calibration status. + """ + + label = "Items" + model = Item + page_size = 50 + + # Fields to display + fields = [ + Field(name="id", label="ID", input_=inputs.Input(), display=displays.Display()), + Field(name="tryout_id", label="Tryout ID", input_=inputs.Input(), display=displays.Display()), + Field(name="website_id", label="Website ID", input_=inputs.Input(), display=displays.Display()), + Field(name="slot", label="Slot", input_=inputs.Input(type="number"), display=displays.Display()), + Field( + name="level", + label="Difficulty Level", + input_=inputs.Select(options=["mudah", "sedang", "sulit"], default="sedang"), + display=displays.Display(), + ), + Field( + name="stem", + label="Question Stem", + input_=inputs.TextArea(), + display=displays.Text(maxlen=100), + ), + Field(name="options", label="Options", input_=inputs.Json(), display=displays.Json()), + Field(name="correct_answer", label="Correct Answer", input_=inputs.Input(), display=displays.Display()), + Field( + name="explanation", + label="Explanation", + input_=inputs.TextArea(), + display=displays.Text(maxlen=100), + ), + Field( + name="ctt_p", + label="CTT p-value", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="ctt_bobot", + label="CTT Bobot", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="ctt_category", + label="CTT Category", + input_=inputs.Select(options=["mudah", "sedang", "sulit"]), + display=displays.Display(), + ), + Field( + name="irt_b", + label="IRT b-parameter", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="irt_se", + label="IRT SE", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="calibrated", + label="Calibrated", + input_=inputs.Switch(), + display=displays.Boolean(true_text="Yes", false_text="No"), + ), + Field( + name="calibration_sample_size", + label="Calibration Sample Size", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="generated_by", + label="Generated By", + input_=inputs.Select(options=["manual", "ai"], default="manual"), + display=displays.Display(), + ), + Field(name="ai_model", label="AI Model", input_=inputs.Input(), display=displays.Display()), + Field( + name="basis_item_id", + label="Basis Item ID", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field(name="created_at", label="Created At", input_=inputs.DateTime(), display=displays.DateTime()), + Field(name="updated_at", label="Updated At", input_=inputs.DateTime(), display=displays.DateTime()), + ] + + +class UserResource(Model): + """ + Admin resource for User model. + + Displays WordPress users and their tryout sessions. + """ + + label = "Users" + model = User + page_size = 50 + + # Fields + fields = [ + Field(name="id", label="ID", input_=inputs.Input(), display=displays.Display()), + Field(name="wp_user_id", label="WordPress User ID", input_=inputs.Input(), display=displays.Display()), + Field(name="website_id", label="Website ID", input_=inputs.Input(), display=displays.Display()), + Field(name="created_at", label="Created At", input_=inputs.DateTime(), display=displays.DateTime()), + Field(name="updated_at", label="Updated At", input_=inputs.DateTime(), display=displays.DateTime()), + ] + + +class SessionResource(Model): + """ + Admin resource for Session model. + + Displays tryout sessions with scoring results (NM, NN, theta). + """ + + label = "Sessions" + model = Session + page_size = 50 + + # Fields + fields = [ + Field(name="id", label="ID", input_=inputs.Input(), display=displays.Display()), + Field(name="session_id", label="Session ID", input_=inputs.Input(), display=displays.Display()), + Field(name="wp_user_id", label="WordPress User ID", input_=inputs.Input(), display=displays.Display()), + Field(name="website_id", label="Website ID", input_=inputs.Input(), display=displays.Display()), + Field(name="tryout_id", label="Tryout ID", input_=inputs.Input(), display=displays.Display()), + Field(name="start_time", label="Start Time", input_=inputs.DateTime(), display=displays.DateTime()), + Field(name="end_time", label="End Time", input_=inputs.DateTime(), display=displays.DateTime()), + Field( + name="is_completed", + label="Completed", + input_=inputs.Switch(), + display=displays.Boolean(true_text="Yes", false_text="No"), + ), + Field( + name="scoring_mode_used", + label="Scoring Mode Used", + input_=inputs.Select(options=["ctt", "irt", "hybrid"]), + display=displays.Display(), + ), + Field(name="total_benar", label="Total Benar", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="total_bobot_earned", label="Total Bobot Earned", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="NM", label="NM Score", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="NN", label="NN Score", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="theta", label="Theta", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="theta_se", label="Theta SE", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="rataan_used", label="Rataan Used", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="sb_used", label="SB Used", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="created_at", label="Created At", input_=inputs.DateTime(), display=displays.DateTime()), + Field(name="updated_at", label="Updated At", input_=inputs.DateTime(), display=displays.DateTime()), + ] + + +class TryoutStatsResource(Model): + """ + Admin resource for TryoutStats model. + + Displays tryout-level statistics and provides normalization reset action. + """ + + label = "Tryout Stats" + model = TryoutStats + page_size = 20 + + # Fields + fields = [ + Field(name="id", label="ID", input_=inputs.Input(), display=displays.Display()), + Field(name="website_id", label="Website ID", input_=inputs.Input(), display=displays.Display()), + Field(name="tryout_id", label="Tryout ID", input_=inputs.Input(), display=displays.Display()), + Field( + name="participant_count", + label="Participant Count", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="total_nm_sum", + label="Total NM Sum", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field( + name="total_nm_sq_sum", + label="Total NM Squared Sum", + input_=inputs.Input(type="number"), + display=displays.Display(), + ), + Field(name="rataan", label="Rataan", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="sb", label="SB", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="min_nm", label="Min NM", input_=inputs.Input(type="number"), display=displays.Display()), + Field(name="max_nm", label="Max NM", input_=inputs.Input(type="number"), display=displays.Display()), + Field( + name="last_calculated", + label="Last Calculated", + input_=inputs.DateTime(), + display=displays.DateTime(), + ), + Field(name="created_at", label="Created At", input_=inputs.DateTime(), display=displays.DateTime()), + Field(name="updated_at", label="Updated At", input_=inputs.DateTime(), display=displays.DateTime()), + ] + + +# ============================================================================= +# Custom Dashboard Views +# ============================================================================= + +class CalibrationDashboardLink(Link): + """ + Link to calibration status dashboard. + + Displays calibration percentage and items awaiting calibration. + """ + + label = "Calibration Status" + icon = "fas fa-chart-line" + url = "/admin/calibration_status" + + async def get(self, request: Request) -> Dict[str, Any]: + """Get calibration status for all tryouts.""" + # Get all tryouts + db_gen = get_db() + db = await db_gen.__anext__() + + try: + result = await db.execute( + select( + Tryout.id, + Tryout.tryout_id, + Tryout.name, + ) + ) + tryouts = result.all() + + calibration_data = [] + for tryout_id, tryout_str, name in tryouts: + # Get calibration status + from app.services.irt_calibration import get_calibration_status + + status = await get_calibration_status(tryout_str, 1, db) + calibration_data.append({ + "tryout_id": tryout_str, + "name": name, + "total_items": status["total_items"], + "calibrated_items": status["calibrated_items"], + "calibration_percentage": status["calibration_percentage"], + "ready_for_irt": status["ready_for_irt"], + }) + + return { + "status": "success", + "data": calibration_data, + } + finally: + await db_gen.aclose() + + +class ItemStatisticsLink(Link): + """ + Link to item statistics view. + + Displays items grouped by difficulty level with calibration status. + """ + + label = "Item Statistics" + icon = "fas fa-chart-bar" + url = "/admin/item_statistics" + + async def get(self, request: Request) -> Dict[str, Any]: + """Get item statistics grouped by difficulty level.""" + db_gen = get_db() + db = await db_gen.__anext__() + + try: + # Get items grouped by level + result = await db.execute( + select( + Item.level, + ) + .distinct() + ) + levels = result.scalars().all() + + stats = [] + for level in levels: + # Get items for this level + item_result = await db.execute( + select(Item) + .where(Item.level == level) + .order_by(Item.slot) + .limit(10) + ) + items = item_result.scalars().all() + + # Calculate average correctness rate + total_responses = sum(item.calibration_sample_size for item in items) + calibrated_count = sum(1 for item in items if item.calibrated) + + level_stats = { + "level": level, + "total_items": len(items), + "calibrated_items": calibrated_count, + "calibration_percentage": (calibrated_count / len(items) * 100) if len(items) > 0 else 0, + "total_responses": total_responses, + "avg_correctness": sum(item.ctt_p or 0 for item in items) / len(items) if len(items) > 0 else 0, + "items": [ + { + "id": item.id, + "slot": item.slot, + "calibrated": item.calibrated, + "ctt_p": item.ctt_p, + "irt_b": item.irt_b, + "calibration_sample_size": item.calibration_sample_size, + } + for item in items + ], + } + stats.append(level_stats) + + return { + "status": "success", + "data": stats, + } + finally: + await db_gen.aclose() + + +class SessionOverviewLink(Link): + """ + Link to session overview view. + + Displays sessions with scores (NM, NN, theta) and completion status. + """ + + label = "Session Overview" + icon = "fas fa-users" + url = "/admin/session_overview" + + async def get(self, request: Request) -> Dict[str, Any]: + """Get session overview with filters.""" + db_gen = get_db() + db = await db_gen.__anext__() + + try: + # Get recent sessions + result = await db.execute( + select(Session) + .order_by(Session.created_at.desc()) + .limit(50) + ) + sessions = result.scalars().all() + + session_data = [ + { + "session_id": session.session_id, + "wp_user_id": session.wp_user_id, + "tryout_id": session.tryout_id, + "is_completed": session.is_completed, + "scoring_mode_used": session.scoring_mode_used, + "total_benar": session.total_benar, + "NM": session.NM, + "NN": session.NN, + "theta": session.theta, + "theta_se": session.theta_se, + "start_time": session.start_time.isoformat() if session.start_time else None, + "end_time": session.end_time.isoformat() if session.end_time else None, + } + for session in sessions + ] + + return { + "status": "success", + "data": session_data, + } + finally: + await db_gen.aclose() + + +# ============================================================================= +# Initialize FastAPI Admin +# ============================================================================= + +def create_admin_app() -> Any: + """ + Create and configure FastAPI Admin application. + + Returns: + FastAPI app with admin panel + """ + # Configure admin app + admin_app.settings.logo_url = "/static/logo.png" + admin_app.settings.site_title = "IRT Bank Soal Admin" + admin_app.settings.site_description = "Admin Panel for Adaptive Question Bank System" + + # Register authentication provider + admin_app.settings.auth_provider = AdminAuthProvider() + + # Register model resources + admin_app.register(TryoutResource) + admin_app.register(ItemResource) + admin_app.register(UserResource) + admin_app.register(SessionResource) + admin_app.register(TryoutStatsResource) + + # Register dashboard links + admin_app.register(CalibrationDashboardLink) + admin_app.register(ItemStatisticsLink) + admin_app.register(SessionOverviewLink) + + return admin_app + + +# Export admin app for mounting in main.py +admin = create_admin_app() diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..754576c --- /dev/null +++ b/app/api/__init__.py @@ -0,0 +1,5 @@ +""" +API module for IRT Bank Soal. + +Contains FastAPI routers and endpoint definitions. +""" diff --git a/app/api/v1/__init__.py b/app/api/v1/__init__.py new file mode 100644 index 0000000..7f76094 --- /dev/null +++ b/app/api/v1/__init__.py @@ -0,0 +1,25 @@ +""" +API v1 Router configuration. + +Defines all API v1 endpoints and their prefixes. +""" + +from fastapi import APIRouter + +from app.api.v1 import session + +api_router = APIRouter() + +# Include session endpoints +api_router.include_router( + session.router, + prefix="/session", + tags=["session"] +) + +# Include admin endpoints +api_router.include_router( + session.admin_router, + prefix="/admin", + tags=["admin"] +) diff --git a/app/api/v1/session.py b/app/api/v1/session.py new file mode 100644 index 0000000..9da4945 --- /dev/null +++ b/app/api/v1/session.py @@ -0,0 +1,388 @@ +""" +Session API endpoints for CAT item selection. + +Provides endpoints for: +- GET /api/v1/session/{session_id}/next_item - Get next question +- POST /api/v1/admin/cat/test - Admin playground for testing CAT +""" + +from typing import Literal, Optional + +from fastapi import APIRouter, Depends, HTTPException, status +from pydantic import BaseModel, Field +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.models import Item, Session, Tryout +from app.services.cat_selection import ( + CATSelectionError, + get_next_item, + should_terminate, + simulate_cat_selection, + update_theta, +) + +# Default SE threshold for termination +DEFAULT_SE_THRESHOLD = 0.5 + +# Session router for student-facing endpoints +router = APIRouter() + +# Admin router for admin-only endpoints +admin_router = APIRouter() + + +# ============== Request/Response Models ============== + +class NextItemResponse(BaseModel): + """Response for next item endpoint.""" + status: Literal["item", "completed"] = "item" + item_id: Optional[int] = None + stem: Optional[str] = None + options: Optional[dict] = None + slot: Optional[int] = None + level: Optional[str] = None + selection_method: Optional[str] = None + reason: Optional[str] = None + current_theta: Optional[float] = None + current_se: Optional[float] = None + items_answered: Optional[int] = None + + +class SubmitAnswerRequest(BaseModel): + """Request for submitting an answer.""" + item_id: int = Field(..., description="Item ID being answered") + response: str = Field(..., description="User's answer (A, B, C, D)") + time_spent: int = Field(default=0, ge=0, description="Time spent on question (seconds)") + + +class SubmitAnswerResponse(BaseModel): + """Response for submitting an answer.""" + is_correct: bool + correct_answer: str + explanation: Optional[str] = None + theta: Optional[float] = None + theta_se: Optional[float] = None + + +class CATTestRequest(BaseModel): + """Request for admin CAT test endpoint.""" + tryout_id: str = Field(..., description="Tryout identifier") + website_id: int = Field(..., description="Website identifier") + initial_theta: float = Field(default=0.0, ge=-3.0, le=3.0, description="Initial theta value") + selection_mode: Literal["fixed", "adaptive", "hybrid"] = Field( + default="adaptive", description="Selection mode" + ) + max_items: int = Field(default=15, ge=1, le=100, description="Maximum items to simulate") + se_threshold: float = Field( + default=0.5, ge=0.1, le=3.0, description="SE threshold for termination" + ) + hybrid_transition_slot: int = Field( + default=10, ge=1, description="Slot to transition in hybrid mode" + ) + + +class CATTestResponse(BaseModel): + """Response for admin CAT test endpoint.""" + tryout_id: str + website_id: int + initial_theta: float + selection_mode: str + total_items: int + final_theta: float + final_se: float + se_threshold_met: bool + items: list + + +# ============== Session Endpoints ============== + +@router.get( + "/{session_id}/next_item", + response_model=NextItemResponse, + summary="Get next item for session", + description="Returns the next question for a session based on the tryout's selection mode." +) +async def get_next_item_endpoint( + session_id: str, + db: AsyncSession = Depends(get_db) +) -> NextItemResponse: + """ + Get the next item for a session. + + Validates session exists and is not completed. + Gets Tryout config (scoring_mode, selection_mode, max_items). + Calls appropriate selection function based on selection_mode. + Returns item or completion status. + """ + # Get session + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Session {session_id} not found" + ) + + if session.is_completed: + return NextItemResponse( + status="completed", + reason="Session already completed" + ) + + # Get tryout config + tryout_query = select(Tryout).where( + Tryout.tryout_id == session.tryout_id, + Tryout.website_id == session.website_id + ) + tryout_result = await db.execute(tryout_query) + tryout = tryout_result.scalar_one_or_none() + + if not tryout: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {session.tryout_id} not found" + ) + + # Check termination conditions + termination = await should_terminate( + db, + session_id, + max_items=None, # Will be set from tryout config if needed + se_threshold=DEFAULT_SE_THRESHOLD + ) + + if termination.should_terminate: + return NextItemResponse( + status="completed", + reason=termination.reason, + current_theta=session.theta, + current_se=session.theta_se, + items_answered=termination.items_answered + ) + + # Get next item based on selection mode + try: + result = await get_next_item( + db, + session_id, + selection_mode=tryout.selection_mode, + hybrid_transition_slot=tryout.hybrid_transition_slot or 10, + ai_generation_enabled=tryout.ai_generation_enabled + ) + except CATSelectionError as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=str(e) + ) + + if result.item is None: + return NextItemResponse( + status="completed", + reason=result.reason, + current_theta=session.theta, + current_se=session.theta_se, + items_answered=termination.items_answered + ) + + item = result.item + + return NextItemResponse( + status="item", + item_id=item.id, + stem=item.stem, + options=item.options, + slot=item.slot, + level=item.level, + selection_method=result.selection_method, + reason=result.reason, + current_theta=session.theta, + current_se=session.theta_se, + items_answered=termination.items_answered + ) + + +@router.post( + "/{session_id}/submit_answer", + response_model=SubmitAnswerResponse, + summary="Submit answer for item", + description="Submit an answer for an item and update theta estimate." +) +async def submit_answer_endpoint( + session_id: str, + request: SubmitAnswerRequest, + db: AsyncSession = Depends(get_db) +) -> SubmitAnswerResponse: + """ + Submit an answer for an item. + + Validates session and item. + Checks correctness. + Updates theta estimate. + Records response time. + """ + # Get session + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Session {session_id} not found" + ) + + if session.is_completed: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Session already completed" + ) + + # Get item + item_query = select(Item).where(Item.id == request.item_id) + item_result = await db.execute(item_query) + item = item_result.scalar_one_or_none() + + if not item: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Item {request.item_id} not found" + ) + + # Check correctness + is_correct = request.response.upper() == item.correct_answer.upper() + + # Update theta + theta, theta_se = await update_theta(db, session_id, request.item_id, is_correct) + + # Create user answer record + from app.models import UserAnswer + + user_answer = UserAnswer( + session_id=session_id, + wp_user_id=session.wp_user_id, + website_id=session.website_id, + tryout_id=session.tryout_id, + item_id=request.item_id, + response=request.response.upper(), + is_correct=is_correct, + time_spent=request.time_spent, + scoring_mode_used=session.scoring_mode_used, + bobot_earned=item.ctt_bobot if is_correct and item.ctt_bobot else 0.0 + ) + + db.add(user_answer) + await db.commit() + + return SubmitAnswerResponse( + is_correct=is_correct, + correct_answer=item.correct_answer, + explanation=item.explanation, + theta=theta, + theta_se=theta_se + ) + + +# ============== Admin Endpoints ============== + +@admin_router.post( + "/cat/test", + response_model=CATTestResponse, + summary="Test CAT selection algorithm", + description="Admin playground for testing adaptive selection behavior." +) +async def test_cat_endpoint( + request: CATTestRequest, + db: AsyncSession = Depends(get_db) +) -> CATTestResponse: + """ + Test CAT selection algorithm. + + Simulates CAT selection for a tryout and returns + the sequence of selected items with theta progression. + """ + # Verify tryout exists + tryout_query = select(Tryout).where( + Tryout.tryout_id == request.tryout_id, + Tryout.website_id == request.website_id + ) + tryout_result = await db.execute(tryout_query) + tryout = tryout_result.scalar_one_or_none() + + if not tryout: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {request.tryout_id} not found for website {request.website_id}" + ) + + # Run simulation + result = await simulate_cat_selection( + db, + tryout_id=request.tryout_id, + website_id=request.website_id, + initial_theta=request.initial_theta, + selection_mode=request.selection_mode, + max_items=request.max_items, + se_threshold=request.se_threshold, + hybrid_transition_slot=request.hybrid_transition_slot + ) + + if "error" in result: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=result["error"] + ) + + return CATTestResponse(**result) + + +@admin_router.get( + "/session/{session_id}/status", + summary="Get session status", + description="Get detailed session status including theta and SE." +) +async def get_session_status_endpoint( + session_id: str, + db: AsyncSession = Depends(get_db) +) -> dict: + """ + Get session status for admin monitoring. + """ + # Get session + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Session {session_id} not found" + ) + + # Count answers + from sqlalchemy import func + from app.models import UserAnswer + + count_query = select(func.count(UserAnswer.id)).where( + UserAnswer.session_id == session_id + ) + count_result = await db.execute(count_query) + items_answered = count_result.scalar() or 0 + + return { + "session_id": session.session_id, + "wp_user_id": session.wp_user_id, + "tryout_id": session.tryout_id, + "is_completed": session.is_completed, + "theta": session.theta, + "theta_se": session.theta_se, + "items_answered": items_answered, + "scoring_mode_used": session.scoring_mode_used, + "NM": session.NM, + "NN": session.NN, + "start_time": session.start_time.isoformat() if session.start_time else None, + "end_time": session.end_time.isoformat() if session.end_time else None + } diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000..df2866f --- /dev/null +++ b/app/core/__init__.py @@ -0,0 +1,3 @@ +""" +Core configuration and database utilities. +""" diff --git a/app/core/config.py b/app/core/config.py new file mode 100644 index 0000000..7fa6ef7 --- /dev/null +++ b/app/core/config.py @@ -0,0 +1,115 @@ +""" +Application configuration using Pydantic Settings. + +Loads configuration from environment variables with validation. +""" + +from typing import Literal, List, Union + +from pydantic import Field, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """Application settings loaded from environment variables.""" + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + ) + + # Database + DATABASE_URL: str = Field( + default="postgresql+asyncpg://postgres:postgres@localhost:5432/irt_bank_soal", + description="PostgreSQL database URL with asyncpg driver", + ) + + # FastAPI + SECRET_KEY: str = Field( + default="dev-secret-key-change-in-production", + description="Secret key for JWT token signing", + ) + API_V1_STR: str = Field(default="/api/v1", description="API v1 prefix") + PROJECT_NAME: str = Field(default="IRT Bank Soal", description="Project name") + ENVIRONMENT: Literal["development", "staging", "production"] = Field( + default="development", description="Environment name" + ) + + # OpenRouter (AI Generation) + OPENROUTER_API_KEY: str = Field( + default="", description="OpenRouter API key for AI generation" + ) + OPENROUTER_MODEL_QWEN: str = Field( + default="qwen/qwen-2.5-coder-32b-instruct", + description="Qwen model identifier", + ) + OPENROUTER_MODEL_LLAMA: str = Field( + default="meta-llama/llama-3.3-70b-instruct", + description="Llama model identifier", + ) + OPENROUTER_TIMEOUT: int = Field(default=30, description="OpenRouter API timeout in seconds") + + # WordPress Integration + WORDPRESS_API_URL: str = Field( + default="", description="WordPress REST API base URL" + ) + WORDPRESS_AUTH_TOKEN: str = Field( + default="", description="WordPress JWT authentication token" + ) + + # Redis (Celery) + REDIS_URL: str = Field( + default="redis://localhost:6379/0", description="Redis connection URL" + ) + CELERY_BROKER_URL: str = Field( + default="redis://localhost:6379/0", description="Celery broker URL" + ) + CELERY_RESULT_BACKEND: str = Field( + default="redis://localhost:6379/0", description="Celery result backend URL" + ) + + # CORS - stored as list, accepts comma-separated string from env + ALLOWED_ORIGINS: List[str] = Field( + default=["http://localhost:3000"], + description="List of allowed CORS origins", + ) + + @field_validator("ALLOWED_ORIGINS", mode="before") + @classmethod + def parse_allowed_origins(cls, v: Union[str, List[str]]) -> List[str]: + """Parse comma-separated origins into list.""" + if isinstance(v, str): + return [origin.strip() for origin in v.split(",") if origin.strip()] + return v + + +# Global settings instance +_settings: Union[Settings, None] = None + + +def get_settings() -> Settings: + """ + Get application settings instance. + + Returns: + Settings: Application settings + + Raises: + ValueError: If settings not initialized + """ + global _settings + if _settings is None: + _settings = Settings() + return _settings + + +def init_settings(settings: Settings) -> None: + """ + Initialize settings with custom instance (useful for testing). + + Args: + settings: Settings instance to use + """ + global _settings + _settings = settings diff --git a/app/database.py b/app/database.py new file mode 100644 index 0000000..c9c41f0 --- /dev/null +++ b/app/database.py @@ -0,0 +1,85 @@ +""" +Database configuration and session management for async PostgreSQL. + +Uses SQLAlchemy 2.0 async ORM with asyncpg driver. +""" + +from typing import AsyncGenerator + +from sqlalchemy.ext.asyncio import ( + AsyncSession, + async_sessionmaker, + create_async_engine, +) +from sqlalchemy.orm import DeclarativeBase + +from app.core.config import get_settings + +settings = get_settings() + +# Create async engine with connection pooling +engine = create_async_engine( + settings.DATABASE_URL, + echo=settings.ENVIRONMENT == "development", # Log SQL in development + pool_pre_ping=True, # Verify connections before using + pool_size=10, # Number of connections to maintain + max_overflow=20, # Max additional connections beyond pool_size + pool_recycle=3600, # Recycle connections after 1 hour +) + +# Create async session factory +AsyncSessionLocal = async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, # Prevent attributes from being expired after commit + autocommit=False, + autoflush=False, +) + + +class Base(DeclarativeBase): + """Base class for all database models.""" + + pass + + +async def get_db() -> AsyncGenerator[AsyncSession, None]: + """ + Dependency for getting async database session. + + Yields: + AsyncSession: Database session + + Example: + ```python + @app.get("/items/") + async def get_items(db: AsyncSession = Depends(get_db)): + result = await db.execute(select(Item)) + return result.scalars().all() + ``` + """ + async with AsyncSessionLocal() as session: + try: + yield session + await session.commit() + except Exception: + await session.rollback() + raise + finally: + await session.close() + + +async def init_db() -> None: + """ + Initialize database - create all tables. + + Note: In production, use Alembic migrations instead. + This is useful for development and testing. + """ + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + +async def close_db() -> None: + """Close database connections.""" + await engine.dispose() diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..277c528 --- /dev/null +++ b/app/main.py @@ -0,0 +1,204 @@ +""" +IRT Bank Soal - Adaptive Question Bank System + +Main FastAPI application entry point. + +Features: +- CTT (Classical Test Theory) scoring with exact Excel formulas +- IRT (Item Response Theory) support for adaptive testing +- Multi-website support for WordPress integration +- AI-powered question generation +""" + +from contextlib import asynccontextmanager +from typing import AsyncGenerator + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.admin import admin as admin_app +from app.core.config import get_settings +from app.database import close_db, init_db +from app.routers import ( + admin_router, + ai_router, + import_export_router, + reports_router, + sessions_router, + tryouts_router, + wordpress_router, +) + +settings = get_settings() + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: + """ + Application lifespan manager. + + Handles startup and shutdown events. + """ + # Startup: Initialize database + await init_db() + + yield + + # Shutdown: Close database connections + await close_db() + + +# Initialize FastAPI application +app = FastAPI( + title="IRT Bank Soal", + description=""" +## Adaptive Question Bank System with IRT/CTT Scoring + +This API provides a comprehensive backend for adaptive assessment systems. + +### Features +- **CTT Scoring**: Classical Test Theory with exact Excel formula compatibility +- **IRT Support**: Item Response Theory for adaptive testing (1PL Rasch model) +- **Multi-Site**: Single backend serving multiple WordPress sites +- **AI Generation**: Automatic question variant generation + +### Scoring Formulas (PRD Section 13.1) +- **CTT p-value**: `p = Σ Benar / Total Peserta` +- **CTT Bobot**: `Bobot = 1 - p` +- **CTT NM**: `NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000` +- **CTT NN**: `NN = 500 + 100 × ((NM - Rataan) / SB)` + +### Authentication +Most endpoints require `X-Website-ID` header for multi-site isolation. + """, + version="1.0.0", + docs_url="/docs", + redoc_url="/redoc", + openapi_url="/openapi.json", + lifespan=lifespan, +) + +# Configure CORS middleware +# Parse ALLOWED_ORIGINS from settings (comma-separated string) +allowed_origins = settings.ALLOWED_ORIGINS +if isinstance(allowed_origins, str): + allowed_origins = [origin.strip() for origin in allowed_origins.split(",") if origin.strip()] + +app.add_middleware( + CORSMiddleware, + allow_origins=allowed_origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# Health check endpoint +@app.get( + "/", + summary="Health check", + description="Returns API status and version information.", + tags=["health"], +) +async def root(): + """ + Health check endpoint. + + Returns basic API information for monitoring and load balancer checks. + """ + return { + "status": "healthy", + "service": "IRT Bank Soal", + "version": "1.0.0", + "docs": "/docs", + } + + +@app.get( + "/health", + summary="Detailed health check", + description="Returns detailed health status including database connectivity.", + tags=["health"], +) +async def health_check(): + """ + Detailed health check endpoint. + + Includes database connectivity verification. + """ + from app.database import engine + from sqlalchemy import text + + db_status = "unknown" + try: + async with engine.connect() as conn: + await conn.execute(text("SELECT 1")) + db_status = "connected" + except Exception as e: + db_status = f"error: {str(e)}" + + return { + "status": "healthy" if db_status == "connected" else "degraded", + "service": "IRT Bank Soal", + "version": "1.0.0", + "database": db_status, + "environment": settings.ENVIRONMENT, + } + + +# Include API routers with version prefix +app.include_router( + import_export_router, +) +app.include_router( + sessions_router, + prefix=f"{settings.API_V1_STR}", +) +app.include_router( + tryouts_router, + prefix=f"{settings.API_V1_STR}", +) +app.include_router( + wordpress_router, + prefix=f"{settings.API_V1_STR}", +) +app.include_router( + ai_router, + prefix=f"{settings.API_V1_STR}", +) +app.include_router( + reports_router, + prefix=f"{settings.API_V1_STR}", +) + + +# Mount FastAPI Admin panel +app.mount("/admin", admin_app) + + +# Include admin API router for custom actions +app.include_router( + admin_router, + prefix=f"{settings.API_V1_STR}", +) + + +# Placeholder routers for future implementation +# These will be implemented in subsequent phases + +# app.include_router( +# items_router, +# prefix=f"{settings.API_V1_STR}", +# tags=["items"], +# ) + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run( + "app.main:app", + host="0.0.0.0", + port=8000, + reload=settings.ENVIRONMENT == "development", + ) diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..21e308b --- /dev/null +++ b/app/models/__init__.py @@ -0,0 +1,25 @@ +""" +Database models for IRT Bank Soal system. + +Exports all SQLAlchemy ORM models for use in the application. +""" + +from app.database import Base +from app.models.item import Item +from app.models.session import Session +from app.models.tryout import Tryout +from app.models.tryout_stats import TryoutStats +from app.models.user import User +from app.models.user_answer import UserAnswer +from app.models.website import Website + +__all__ = [ + "Base", + "User", + "Website", + "Tryout", + "Item", + "Session", + "UserAnswer", + "TryoutStats", +] diff --git a/app/models/item.py b/app/models/item.py new file mode 100644 index 0000000..bb863f9 --- /dev/null +++ b/app/models/item.py @@ -0,0 +1,222 @@ +""" +Item model for questions with CTT and IRT parameters. + +Represents individual questions with both classical test theory (CTT) +and item response theory (IRT) parameters. +""" + +from datetime import datetime +from typing import Literal, Union + +from sqlalchemy import ( + Boolean, + CheckConstraint, + DateTime, + Float, + ForeignKey, + Index, + Integer, + JSON, + String, + Text, +) +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class Item(Base): + """ + Item model representing individual questions. + + Supports both CTT (p, bobot, category) and IRT (b, se) parameters. + Tracks AI generation metadata and calibration status. + + Attributes: + id: Primary key + tryout_id: Tryout identifier + website_id: Website identifier + slot: Question position in tryout + level: Difficulty level (mudah, sedang, sulit) + stem: Question text + options: JSON array of answer options + correct_answer: Correct option (A, B, C, D) + explanation: Answer explanation + ctt_p: CTT difficulty (proportion correct) + ctt_bobot: CTT weight (1 - p) + ctt_category: CTT difficulty category + irt_b: IRT difficulty parameter [-3, +3] + irt_se: IRT standard error + calibrated: Calibration status + calibration_sample_size: Sample size for calibration + generated_by: Generation source (manual, ai) + ai_model: AI model used (if generated by AI) + basis_item_id: Original item ID (for AI variants) + created_at: Record creation timestamp + updated_at: Record update timestamp + tryout: Tryout relationship + user_answers: User responses to this item + """ + + __tablename__ = "items" + + # Primary key + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + + # Foreign keys + tryout_id: Mapped[str] = mapped_column( + String(255), nullable=False, index=True, comment="Tryout identifier" + ) + website_id: Mapped[int] = mapped_column( + ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Website identifier", + ) + + # Position and difficulty + slot: Mapped[int] = mapped_column( + Integer, nullable=False, comment="Question position in tryout" + ) + level: Mapped[Literal["mudah", "sedang", "sulit"]] = mapped_column( + String(50), nullable=False, comment="Difficulty level" + ) + + # Question content + stem: Mapped[str] = mapped_column(Text, nullable=False, comment="Question text") + options: Mapped[dict] = mapped_column( + JSON, + nullable=False, + comment="JSON object with options (e.g., {\"A\": \"option1\", \"B\": \"option2\"})", + ) + correct_answer: Mapped[str] = mapped_column( + String(10), nullable=False, comment="Correct option (A, B, C, D)" + ) + explanation: Mapped[Union[str, None]] = mapped_column( + Text, nullable=True, comment="Answer explanation" + ) + + # CTT parameters + ctt_p: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="CTT difficulty (proportion correct)", + ) + ctt_bobot: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="CTT weight (1 - p)", + ) + ctt_category: Mapped[Union[Literal["mudah", "sedang", "sulit"], None]] = mapped_column( + String(50), + nullable=True, + comment="CTT difficulty category", + ) + + # IRT parameters (1PL Rasch model) + irt_b: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="IRT difficulty parameter [-3, +3]", + ) + irt_se: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="IRT standard error", + ) + + # Calibration status + calibrated: Mapped[bool] = mapped_column( + Boolean, nullable=False, default=False, comment="Calibration status" + ) + calibration_sample_size: Mapped[int] = mapped_column( + Integer, + nullable=False, + default=0, + comment="Sample size for calibration", + ) + + # AI generation metadata + generated_by: Mapped[Literal["manual", "ai"]] = mapped_column( + String(50), + nullable=False, + default="manual", + comment="Generation source", + ) + ai_model: Mapped[Union[str, None]] = mapped_column( + String(255), + nullable=True, + comment="AI model used (if generated by AI)", + ) + basis_item_id: Mapped[Union[int, None]] = mapped_column( + ForeignKey("items.id", ondelete="SET NULL", onupdate="CASCADE"), + nullable=True, + index=True, + comment="Original item ID (for AI variants)", + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default="NOW()", + onupdate="NOW()", + ) + + # Relationships + tryout: Mapped["Tryout"] = relationship( + "Tryout", back_populates="items", lazy="selectin" + ) + user_answers: Mapped[list["UserAnswer"]] = relationship( + "UserAnswer", back_populates="item", lazy="selectin", cascade="all, delete-orphan" + ) + basis_item: Mapped[Union["Item", None]] = relationship( + "Item", + remote_side=[id], + back_populates="variants", + lazy="selectin", + single_parent=True, + ) + variants: Mapped[list["Item"]] = relationship( + "Item", + back_populates="basis_item", + lazy="selectin", + cascade="all, delete-orphan", + ) + + # Constraints and indexes + __table_args__ = ( + Index( + "ix_items_tryout_id_website_id_slot", + "tryout_id", + "website_id", + "slot", + "level", + unique=True, + ), + Index("ix_items_calibrated", "calibrated"), + Index("ix_items_basis_item_id", "basis_item_id"), + # IRT b parameter constraint [-3, +3] + CheckConstraint( + "irt_b IS NULL OR (irt_b >= -3 AND irt_b <= 3)", + "ck_irt_b_range", + ), + # CTT p constraint [0, 1] + CheckConstraint( + "ctt_p IS NULL OR (ctt_p >= 0 AND ctt_p <= 1)", + "ck_ctt_p_range", + ), + # CTT bobot constraint [0, 1] + CheckConstraint( + "ctt_bobot IS NULL OR (ctt_bobot >= 0 AND ctt_bobot <= 1)", + "ck_ctt_bobot_range", + ), + # Slot must be positive + CheckConstraint("slot > 0", "ck_slot_positive"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/models/session.py b/app/models/session.py new file mode 100644 index 0000000..6194dca --- /dev/null +++ b/app/models/session.py @@ -0,0 +1,193 @@ +""" +Session model for tryout attempt tracking. + +Represents a student's attempt at a tryout with scoring information. +""" + +from datetime import datetime +from typing import Literal, Union + +from sqlalchemy import ( + Boolean, + CheckConstraint, + DateTime, + Float, + ForeignKey, + Index, + Integer, + String, +) +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class Session(Base): + """ + Session model representing a student's tryout attempt. + + Tracks session metadata, scoring results, and IRT estimates. + + Attributes: + id: Primary key + session_id: Unique session identifier + wp_user_id: WordPress user ID + website_id: Website identifier + tryout_id: Tryout identifier + start_time: Session start timestamp + end_time: Session end timestamp + is_completed: Completion status + scoring_mode_used: Scoring mode used for this session + total_benar: Total correct answers + total_bobot_earned: Total weight earned + NM: Nilai Mentah (raw score) [0, 1000] + NN: Nilai Nasional (normalized score) [0, 1000] + theta: IRT ability estimate [-3, +3] + theta_se: IRT standard error + rataan_used: Mean value used for normalization + sb_used: Standard deviation used for normalization + created_at: Record creation timestamp + updated_at: Record update timestamp + user: User relationship + tryout: Tryout relationship + user_answers: User's responses in this session + """ + + __tablename__ = "sessions" + + # Primary key + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + + # Session identifier (globally unique) + session_id: Mapped[str] = mapped_column( + String(255), + nullable=False, + unique=True, + index=True, + comment="Unique session identifier", + ) + + # Foreign keys + wp_user_id: Mapped[str] = mapped_column( + String(255), nullable=False, index=True, comment="WordPress user ID" + ) + website_id: Mapped[int] = mapped_column( + ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Website identifier", + ) + tryout_id: Mapped[str] = mapped_column( + String(255), nullable=False, index=True, comment="Tryout identifier" + ) + + # Timestamps + start_time: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + end_time: Mapped[Union[datetime, None]] = mapped_column( + DateTime(timezone=True), nullable=True, comment="Session end timestamp" + ) + is_completed: Mapped[bool] = mapped_column( + Boolean, nullable=False, default=False, comment="Completion status" + ) + + # Scoring metadata + scoring_mode_used: Mapped[Literal["ctt", "irt", "hybrid"]] = mapped_column( + String(50), + nullable=False, + comment="Scoring mode used for this session", + ) + + # CTT scoring results + total_benar: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, comment="Total correct answers" + ) + total_bobot_earned: Mapped[float] = mapped_column( + Float, nullable=False, default=0.0, comment="Total weight earned" + ) + NM: Mapped[Union[int, None]] = mapped_column( + Integer, + nullable=True, + comment="Nilai Mentah (raw score) [0, 1000]", + ) + NN: Mapped[Union[int, None]] = mapped_column( + Integer, + nullable=True, + comment="Nilai Nasional (normalized score) [0, 1000]", + ) + + # IRT scoring results + theta: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="IRT ability estimate [-3, +3]", + ) + theta_se: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="IRT standard error", + ) + + # Normalization metadata + rataan_used: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="Mean value used for normalization", + ) + sb_used: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="Standard deviation used for normalization", + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default="NOW()", + onupdate="NOW()", + ) + + # Relationships + user: Mapped["User"] = relationship( + "User", back_populates="sessions", lazy="selectin" + ) + tryout: Mapped["Tryout"] = relationship( + "Tryout", back_populates="sessions", lazy="selectin" + ) + user_answers: Mapped[list["UserAnswer"]] = relationship( + "UserAnswer", back_populates="session", lazy="selectin", cascade="all, delete-orphan" + ) + + # Constraints and indexes + __table_args__ = ( + Index("ix_sessions_wp_user_id", "wp_user_id"), + Index("ix_sessions_website_id", "website_id"), + Index("ix_sessions_tryout_id", "tryout_id"), + Index("ix_sessions_is_completed", "is_completed"), + # Score constraints [0, 1000] + CheckConstraint( + "NM IS NULL OR (NM >= 0 AND NM <= 1000)", + "ck_nm_range", + ), + CheckConstraint( + "NN IS NULL OR (NN >= 0 AND NN <= 1000)", + "ck_nn_range", + ), + # IRT theta constraint [-3, +3] + CheckConstraint( + "theta IS NULL OR (theta >= -3 AND theta <= 3)", + "ck_theta_range", + ), + # Total correct must be non-negative + CheckConstraint("total_benar >= 0", "ck_total_benar_non_negative"), + # Total bobot must be non-negative + CheckConstraint("total_bobot_earned >= 0", "ck_total_bobot_non_negative"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/models/tryout.py b/app/models/tryout.py new file mode 100644 index 0000000..fae0204 --- /dev/null +++ b/app/models/tryout.py @@ -0,0 +1,184 @@ +""" +Tryout model with configuration for assessment sessions. + +Represents tryout exams with configurable scoring, selection, and normalization modes. +""" + +from datetime import datetime +from typing import Literal, Union + +from sqlalchemy import Boolean, CheckConstraint, DateTime, Float, ForeignKey, Index, Integer, String +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class Tryout(Base): + """ + Tryout model with configuration for assessment sessions. + + Supports multiple scoring modes (CTT, IRT, hybrid), selection strategies + (fixed, adaptive, hybrid), and normalization modes (static, dynamic, hybrid). + + Attributes: + id: Primary key + website_id: Website identifier + tryout_id: Tryout identifier (unique per website) + name: Tryout name + description: Tryout description + scoring_mode: Scoring algorithm (ctt, irt, hybrid) + selection_mode: Item selection strategy (fixed, adaptive, hybrid) + normalization_mode: Normalization method (static, dynamic, hybrid) + min_sample_for_dynamic: Minimum sample size for dynamic normalization + static_rataan: Static mean value for manual normalization + static_sb: Static standard deviation for manual normalization + AI_generation_enabled: Enable/disable AI question generation + hybrid_transition_slot: Slot number to transition from fixed to adaptive + min_calibration_sample: Minimum responses needed for IRT calibration + theta_estimation_method: Method for estimating theta (mle, map, eap) + fallback_to_ctt_on_error: Fallback to CTT if IRT fails + created_at: Record creation timestamp + updated_at: Record update timestamp + website: Website relationship + items: Items in this tryout + sessions: Sessions for this tryout + stats: Tryout statistics + """ + + __tablename__ = "tryouts" + + # Primary key + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + + # Foreign keys + website_id: Mapped[int] = mapped_column( + ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Website identifier", + ) + + # Tryout identifier (unique per website) + tryout_id: Mapped[str] = mapped_column( + String(255), + nullable=False, + index=True, + comment="Tryout identifier (unique per website)", + ) + + # Basic information + name: Mapped[str] = mapped_column( + String(255), nullable=False, comment="Tryout name" + ) + description: Mapped[Union[str, None]] = mapped_column( + String(1000), nullable=True, comment="Tryout description" + ) + + # Scoring mode: ctt (Classical Test Theory), irt (Item Response Theory), hybrid + scoring_mode: Mapped[Literal["ctt", "irt", "hybrid"]] = mapped_column( + String(50), nullable=False, default="ctt", comment="Scoring mode" + ) + + # Selection mode: fixed (slot order), adaptive (CAT), hybrid (mixed) + selection_mode: Mapped[Literal["fixed", "adaptive", "hybrid"]] = mapped_column( + String(50), nullable=False, default="fixed", comment="Item selection mode" + ) + + # Normalization mode: static (hardcoded), dynamic (real-time), hybrid + normalization_mode: Mapped[Literal["static", "dynamic", "hybrid"]] = mapped_column( + String(50), nullable=False, default="static", comment="Normalization mode" + ) + + # Normalization settings + min_sample_for_dynamic: Mapped[int] = mapped_column( + Integer, + nullable=False, + default=100, + comment="Minimum sample size for dynamic normalization", + ) + static_rataan: Mapped[float] = mapped_column( + Float, + nullable=False, + default=500.0, + comment="Static mean value for manual normalization", + ) + static_sb: Mapped[float] = mapped_column( + Float, + nullable=False, + default=100.0, + comment="Static standard deviation for manual normalization", + ) + + # AI generation settings + ai_generation_enabled: Mapped[bool] = mapped_column( + Boolean, + nullable=False, + default=False, + comment="Enable/disable AI question generation", + ) + + # Hybrid mode settings + hybrid_transition_slot: Mapped[Union[int, None]] = mapped_column( + Integer, + nullable=True, + comment="Slot number to transition from fixed to adaptive (hybrid mode)", + ) + + # IRT settings + min_calibration_sample: Mapped[int] = mapped_column( + Integer, + nullable=False, + default=100, + comment="Minimum responses needed for IRT calibration", + ) + theta_estimation_method: Mapped[Literal["mle", "map", "eap"]] = mapped_column( + String(50), + nullable=False, + default="mle", + comment="Method for estimating theta", + ) + fallback_to_ctt_on_error: Mapped[bool] = mapped_column( + Boolean, + nullable=False, + default=True, + comment="Fallback to CTT if IRT fails", + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default="NOW()", + onupdate="NOW()", + ) + + # Relationships + website: Mapped["Website"] = relationship( + "Website", back_populates="tryouts", lazy="selectin" + ) + items: Mapped[list["Item"]] = relationship( + "Item", back_populates="tryout", lazy="selectin", cascade="all, delete-orphan" + ) + sessions: Mapped[list["Session"]] = relationship( + "Session", back_populates="tryout", lazy="selectin", cascade="all, delete-orphan" + ) + stats: Mapped["TryoutStats"] = relationship( + "TryoutStats", back_populates="tryout", lazy="selectin", uselist=False + ) + + # Constraints and indexes + __table_args__ = ( + Index( + "ix_tryouts_website_id_tryout_id", "website_id", "tryout_id", unique=True + ), + CheckConstraint("min_sample_for_dynamic > 0", "ck_min_sample_positive"), + CheckConstraint("static_rataan > 0", "ck_static_rataan_positive"), + CheckConstraint("static_sb > 0", "ck_static_sb_positive"), + CheckConstraint("min_calibration_sample > 0", "ck_min_calibration_positive"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/models/tryout_stats.py b/app/models/tryout_stats.py new file mode 100644 index 0000000..1ee4307 --- /dev/null +++ b/app/models/tryout_stats.py @@ -0,0 +1,151 @@ +""" +TryoutStats model for tracking tryout-level statistics. + +Maintains running statistics for dynamic normalization and reporting. +""" + +from datetime import datetime +from typing import Union + +from sqlalchemy import CheckConstraint, DateTime, Float, ForeignKey, Index, Integer, String +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class TryoutStats(Base): + """ + TryoutStats model for maintaining tryout-level statistics. + + Tracks participant counts, score distributions, and calculated + normalization parameters (rataan, sb) for dynamic normalization. + + Attributes: + id: Primary key + website_id: Website identifier + tryout_id: Tryout identifier + participant_count: Number of completed sessions + total_nm_sum: Running sum of NM scores + total_nm_sq_sum: Running sum of squared NM scores (for variance calc) + rataan: Calculated mean of NM scores + sb: Calculated standard deviation of NM scores + min_nm: Minimum NM score observed + max_nm: Maximum NM score observed + last_calculated: Timestamp of last statistics update + created_at: Record creation timestamp + updated_at: Record update timestamp + tryout: Tryout relationship + """ + + __tablename__ = "tryout_stats" + + # Primary key + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + + # Foreign keys + website_id: Mapped[int] = mapped_column( + ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Website identifier", + ) + tryout_id: Mapped[str] = mapped_column( + String(255), + nullable=False, + index=True, + comment="Tryout identifier", + ) + + # Running statistics + participant_count: Mapped[int] = mapped_column( + Integer, + nullable=False, + default=0, + comment="Number of completed sessions", + ) + total_nm_sum: Mapped[float] = mapped_column( + Float, + nullable=False, + default=0.0, + comment="Running sum of NM scores", + ) + total_nm_sq_sum: Mapped[float] = mapped_column( + Float, + nullable=False, + default=0.0, + comment="Running sum of squared NM scores", + ) + + # Calculated statistics + rataan: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="Calculated mean of NM scores", + ) + sb: Mapped[Union[float, None]] = mapped_column( + Float, + nullable=True, + comment="Calculated standard deviation of NM scores", + ) + + # Score range + min_nm: Mapped[Union[int, None]] = mapped_column( + Integer, + nullable=True, + comment="Minimum NM score observed", + ) + max_nm: Mapped[Union[int, None]] = mapped_column( + Integer, + nullable=True, + comment="Maximum NM score observed", + ) + + # Timestamps + last_calculated: Mapped[Union[datetime, None]] = mapped_column( + DateTime(timezone=True), + nullable=True, + comment="Timestamp of last statistics update", + ) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default="NOW()", + onupdate="NOW()", + ) + + # Relationships + tryout: Mapped["Tryout"] = relationship( + "Tryout", back_populates="stats", lazy="selectin" + ) + + # Constraints and indexes + __table_args__ = ( + Index( + "ix_tryout_stats_website_id_tryout_id", + "website_id", + "tryout_id", + unique=True, + ), + # Participant count must be non-negative + CheckConstraint("participant_count >= 0", "ck_participant_count_non_negative"), + # Min and max NM must be within valid range [0, 1000] + CheckConstraint( + "min_nm IS NULL OR (min_nm >= 0 AND min_nm <= 1000)", + "ck_min_nm_range", + ), + CheckConstraint( + "max_nm IS NULL OR (max_nm >= 0 AND max_nm <= 1000)", + "ck_max_nm_range", + ), + # Min must be less than or equal to max + CheckConstraint( + "min_nm IS NULL OR max_nm IS NULL OR min_nm <= max_nm", + "ck_min_max_nm_order", + ), + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/models/user.py b/app/models/user.py new file mode 100644 index 0000000..c2e9e55 --- /dev/null +++ b/app/models/user.py @@ -0,0 +1,72 @@ +""" +User model for WordPress user integration. + +Represents users from WordPress that can take tryouts. +""" + +from datetime import datetime + +from sqlalchemy import DateTime, ForeignKey, Index, String +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class User(Base): + """ + User model representing WordPress users. + + Attributes: + id: Primary key + wp_user_id: WordPress user ID (unique per site) + website_id: Website identifier (for multi-site support) + created_at: Record creation timestamp + updated_at: Record update timestamp + sessions: User's tryout sessions + """ + + __tablename__ = "users" + + # Primary key + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + + # WordPress user ID (unique within website context) + wp_user_id: Mapped[int] = mapped_column( + String(255), nullable=False, index=True, comment="WordPress user ID" + ) + + # Website identifier (for multi-site support) + website_id: Mapped[int] = mapped_column( + ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Website identifier", + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default="NOW()", + onupdate="NOW()", + ) + + # Relationships + website: Mapped["Website"] = relationship( + "Website", back_populates="users", lazy="selectin" + ) + sessions: Mapped[list["Session"]] = relationship( + "Session", back_populates="user", lazy="selectin", cascade="all, delete-orphan" + ) + + # Indexes + __table_args__ = ( + Index("ix_users_wp_user_id_website_id", "wp_user_id", "website_id", unique=True), + Index("ix_users_website_id", "website_id"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/models/user_answer.py b/app/models/user_answer.py new file mode 100644 index 0000000..9697ffe --- /dev/null +++ b/app/models/user_answer.py @@ -0,0 +1,137 @@ +""" +UserAnswer model for tracking individual question responses. + +Represents a student's response to a single question with scoring metadata. +""" + +from datetime import datetime +from typing import Literal, Union + +from sqlalchemy import Boolean, CheckConstraint, DateTime, Float, ForeignKey, Index, Integer, String +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class UserAnswer(Base): + """ + UserAnswer model representing a student's response to a question. + + Tracks response, correctness, scoring, and timing information. + + Attributes: + id: Primary key + session_id: Session identifier + wp_user_id: WordPress user ID + website_id: Website identifier + tryout_id: Tryout identifier + item_id: Item identifier + response: User's answer (A, B, C, D) + is_correct: Whether answer is correct + time_spent: Time spent on this question (seconds) + scoring_mode_used: Scoring mode used + bobot_earned: Weight earned for this answer + created_at: Record creation timestamp + updated_at: Record update timestamp + session: Session relationship + item: Item relationship + """ + + __tablename__ = "user_answers" + + # Primary key + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + + # Foreign keys + session_id: Mapped[str] = mapped_column( + ForeignKey("sessions.session_id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Session identifier", + ) + wp_user_id: Mapped[str] = mapped_column( + String(255), nullable=False, index=True, comment="WordPress user ID" + ) + website_id: Mapped[int] = mapped_column( + ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Website identifier", + ) + tryout_id: Mapped[str] = mapped_column( + String(255), nullable=False, index=True, comment="Tryout identifier" + ) + item_id: Mapped[int] = mapped_column( + ForeignKey("items.id", ondelete="CASCADE", onupdate="CASCADE"), + nullable=False, + index=True, + comment="Item identifier", + ) + + # Response information + response: Mapped[str] = mapped_column( + String(10), nullable=False, comment="User's answer (A, B, C, D)" + ) + is_correct: Mapped[bool] = mapped_column( + Boolean, nullable=False, comment="Whether answer is correct" + ) + time_spent: Mapped[int] = mapped_column( + Integer, + nullable=False, + default=0, + comment="Time spent on this question (seconds)", + ) + + # Scoring metadata + scoring_mode_used: Mapped[Literal["ctt", "irt", "hybrid"]] = mapped_column( + String(50), + nullable=False, + comment="Scoring mode used", + ) + bobot_earned: Mapped[float] = mapped_column( + Float, + nullable=False, + default=0.0, + comment="Weight earned for this answer", + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default="NOW()", + onupdate="NOW()", + ) + + # Relationships + session: Mapped["Session"] = relationship( + "Session", back_populates="user_answers", lazy="selectin" + ) + item: Mapped["Item"] = relationship( + "Item", back_populates="user_answers", lazy="selectin" + ) + + # Constraints and indexes + __table_args__ = ( + Index("ix_user_answers_session_id", "session_id"), + Index("ix_user_answers_wp_user_id", "wp_user_id"), + Index("ix_user_answers_website_id", "website_id"), + Index("ix_user_answers_tryout_id", "tryout_id"), + Index("ix_user_answers_item_id", "item_id"), + Index( + "ix_user_answers_session_id_item_id", + "session_id", + "item_id", + unique=True, + ), + # Time spent must be non-negative + CheckConstraint("time_spent >= 0", "ck_time_spent_non_negative"), + # Bobot earned must be non-negative + CheckConstraint("bobot_earned >= 0", "ck_bobot_earned_non_negative"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/models/website.py b/app/models/website.py new file mode 100644 index 0000000..7664890 --- /dev/null +++ b/app/models/website.py @@ -0,0 +1,69 @@ +""" +Website model for multi-site support. + +Represents WordPress websites that use the IRT Bank Soal system. +""" + +from datetime import datetime + +from sqlalchemy import DateTime, String +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class Website(Base): + """ + Website model representing WordPress sites. + + Enables multi-site support where a single backend serves multiple + WordPress-powered educational sites. + + Attributes: + id: Primary key + site_url: WordPress site URL + site_name: Human-readable site name + created_at: Record creation timestamp + updated_at: Record update timestamp + users: Users belonging to this website + tryouts: Tryouts available on this website + """ + + __tablename__ = "websites" + + # Primary key + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + + # Site information + site_url: Mapped[str] = mapped_column( + String(512), + nullable=False, + unique=True, + index=True, + comment="WordPress site URL", + ) + site_name: Mapped[str] = mapped_column( + String(255), nullable=False, comment="Human-readable site name" + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default="NOW()" + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default="NOW()", + onupdate="NOW()", + ) + + # Relationships + users: Mapped[list["User"]] = relationship( + "User", back_populates="website", lazy="selectin", cascade="all, delete-orphan" + ) + tryouts: Mapped[list["Tryout"]] = relationship( + "Tryout", back_populates="website", lazy="selectin", cascade="all, delete-orphan" + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000..83ee0c5 --- /dev/null +++ b/app/routers/__init__.py @@ -0,0 +1,13 @@ +""" +API routers package. +""" + +from app.routers.sessions import router as sessions_router +from app.routers.tryouts import router as tryouts_router +from app.routers.reports import router as reports_router + +__all__ = [ + "sessions_router", + "tryouts_router", + "reports_router", +] diff --git a/app/routers/admin.py b/app/routers/admin.py new file mode 100644 index 0000000..3c04f9f --- /dev/null +++ b/app/routers/admin.py @@ -0,0 +1,249 @@ +""" +Admin API router for custom admin actions. + +Provides admin-specific endpoints for triggering calibration, +toggling AI generation, and resetting normalization. +""" + +from typing import Dict, Optional + +from fastapi import APIRouter, Depends, Header, HTTPException, status +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.database import get_db +from app.models import Tryout, TryoutStats +from app.services.irt_calibration import ( + calibrate_all, + CALIBRATION_SAMPLE_THRESHOLD, +) + +router = APIRouter(prefix="/admin", tags=["admin"]) +settings = get_settings() + + +def get_admin_website_id( + x_website_id: Optional[str] = Header(None, alias="X-Website-ID"), +) -> int: + """ + Extract and validate website_id from request header for admin operations. + + Args: + x_website_id: Website ID from header + + Returns: + Validated website ID as integer + + Raises: + HTTPException: If header is missing or invalid + """ + if x_website_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID header is required", + ) + try: + return int(x_website_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID must be a valid integer", + ) + + +@router.post( + "/{tryout_id}/calibrate", + summary="Trigger IRT calibration", + description="Trigger IRT calibration for all items in this tryout with sufficient response data.", +) +async def admin_trigger_calibration( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_admin_website_id), +) -> Dict[str, any]: + """ + Trigger IRT calibration for all items in a tryout. + + Runs calibration for items with >= min_calibration_sample responses. + Updates item.irt_b, item.irt_se, and item.calibrated status. + + Args: + tryout_id: Tryout identifier + db: Database session + website_id: Website ID from header + + Returns: + Calibration results summary + + Raises: + HTTPException: If tryout not found or calibration fails + """ + # Verify tryout exists + tryout_result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {tryout_id} not found for website {website_id}", + ) + + # Run calibration + result = await calibrate_all( + tryout_id=tryout_id, + website_id=website_id, + db=db, + min_sample_size=tryout.min_calibration_sample or CALIBRATION_SAMPLE_THRESHOLD, + ) + + return { + "tryout_id": tryout_id, + "total_items": result.total_items, + "calibrated_items": result.calibrated_items, + "failed_items": result.failed_items, + "calibration_percentage": round(result.calibration_percentage * 100, 2), + "ready_for_irt": result.ready_for_irt, + "message": f"Calibration complete: {result.calibrated_items}/{result.total_items} items calibrated", + } + + +@router.post( + "/{tryout_id}/toggle-ai-generation", + summary="Toggle AI generation", + description="Toggle AI question generation for a tryout.", +) +async def admin_toggle_ai_generation( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_admin_website_id), +) -> Dict[str, any]: + """ + Toggle AI generation for a tryout. + + Updates Tryout.AI_generation_enabled field. + + Args: + tryout_id: Tryout identifier + db: Database session + website_id: Website ID from header + + Returns: + Updated AI generation status + + Raises: + HTTPException: If tryout not found + """ + # Get tryout + result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {tryout_id} not found for website {website_id}", + ) + + # Toggle AI generation + tryout.ai_generation_enabled = not tryout.ai_generation_enabled + await db.commit() + await db.refresh(tryout) + + status = "enabled" if tryout.ai_generation_enabled else "disabled" + return { + "tryout_id": tryout_id, + "ai_generation_enabled": tryout.ai_generation_enabled, + "message": f"AI generation {status} for tryout {tryout_id}", + } + + +@router.post( + "/{tryout_id}/reset-normalization", + summary="Reset normalization", + description="Reset normalization to static values and clear incremental stats.", +) +async def admin_reset_normalization( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_admin_website_id), +) -> Dict[str, any]: + """ + Reset normalization for a tryout. + + Resets rataan, sb to static values and clears incremental stats. + + Args: + tryout_id: Tryout identifier + db: Database session + website_id: Website ID from header + + Returns: + Reset statistics + + Raises: + HTTPException: If tryout or stats not found + """ + # Get tryout stats + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + if stats is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"TryoutStats for {tryout_id} not found for website {website_id}", + ) + + # Get tryout for static values + tryout_result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout: + # Reset to static values + stats.rataan = tryout.static_rataan + stats.sb = tryout.static_sb + else: + # Reset to default values + stats.rataan = 500.0 + stats.sb = 100.0 + + # Clear incremental stats + old_participant_count = stats.participant_count + stats.participant_count = 0 + stats.total_nm_sum = 0.0 + stats.total_nm_sq_sum = 0.0 + stats.min_nm = None + stats.max_nm = None + stats.last_calculated = None + + await db.commit() + await db.refresh(stats) + + return { + "tryout_id": tryout_id, + "rataan": stats.rataan, + "sb": stats.sb, + "cleared_stats": { + "previous_participant_count": old_participant_count, + }, + "message": f"Normalization reset to static values (rataan={stats.rataan}, sb={stats.sb}). Incremental stats cleared.", + } diff --git a/app/routers/ai.py b/app/routers/ai.py new file mode 100644 index 0000000..7815ac6 --- /dev/null +++ b/app/routers/ai.py @@ -0,0 +1,292 @@ +""" +AI Generation Router. + +Admin endpoints for AI question generation playground. +""" + +import logging +from typing import Annotated + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy import and_, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.models.item import Item +from app.schemas.ai import ( + AIGeneratePreviewRequest, + AIGeneratePreviewResponse, + AISaveRequest, + AISaveResponse, + AIStatsResponse, +) +from app.services.ai_generation import ( + generate_question, + get_ai_stats, + save_ai_question, + validate_ai_model, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/ai", tags=["admin", "ai-generation"]) + + +@router.post( + "/generate-preview", + response_model=AIGeneratePreviewResponse, + summary="Preview AI-generated question", + description=""" + Generate a question preview using AI without saving to database. + + This is an admin playground endpoint for testing AI generation quality. + Admins can retry unlimited times until satisfied with the result. + + Requirements: + - basis_item_id must reference an existing item at 'sedang' level + - target_level must be 'mudah' or 'sulit' + - ai_model must be a supported OpenRouter model + """, + responses={ + 200: {"description": "Question generated successfully (preview mode)"}, + 400: {"description": "Invalid request (wrong level, unsupported model)"}, + 404: {"description": "Basis item not found"}, + 500: {"description": "AI generation failed"}, + }, +) +async def generate_preview( + request: AIGeneratePreviewRequest, + db: Annotated[AsyncSession, Depends(get_db)], +) -> AIGeneratePreviewResponse: + """ + Generate AI question preview (no database save). + + - **basis_item_id**: ID of the sedang-level question to base generation on + - **target_level**: Target difficulty (mudah/sulit) + - **ai_model**: OpenRouter model to use (default: qwen/qwen-2.5-coder-32b-instruct) + """ + # Validate AI model + if not validate_ai_model(request.ai_model): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Unsupported AI model: {request.ai_model}. " + f"Supported models: qwen/qwen-2.5-coder-32b-instruct, meta-llama/llama-3.3-70b-instruct", + ) + + # Fetch basis item + result = await db.execute( + select(Item).where(Item.id == request.basis_item_id) + ) + basis_item = result.scalar_one_or_none() + + if not basis_item: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Basis item not found: {request.basis_item_id}", + ) + + # Validate basis item is sedang level + if basis_item.level != "sedang": + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Basis item must be 'sedang' level, got: {basis_item.level}", + ) + + # Generate question + try: + generated = await generate_question( + basis_item=basis_item, + target_level=request.target_level, + ai_model=request.ai_model, + ) + + if not generated: + return AIGeneratePreviewResponse( + success=False, + error="AI generation failed. Please check logs or try again.", + ai_model=request.ai_model, + basis_item_id=request.basis_item_id, + target_level=request.target_level, + ) + + return AIGeneratePreviewResponse( + success=True, + stem=generated.stem, + options=generated.options, + correct=generated.correct, + explanation=generated.explanation, + ai_model=request.ai_model, + basis_item_id=request.basis_item_id, + target_level=request.target_level, + cached=False, + ) + + except Exception as e: + logger.error(f"AI preview generation failed: {e}") + return AIGeneratePreviewResponse( + success=False, + error=f"AI generation error: {str(e)}", + ai_model=request.ai_model, + basis_item_id=request.basis_item_id, + target_level=request.target_level, + ) + + +@router.post( + "/generate-save", + response_model=AISaveResponse, + summary="Save AI-generated question", + description=""" + Save an AI-generated question to the database. + + This endpoint creates a new Item record with: + - generated_by='ai' + - ai_model from request + - basis_item_id linking to original question + - calibrated=False (will be calculated later) + """, + responses={ + 200: {"description": "Question saved successfully"}, + 400: {"description": "Invalid request data"}, + 404: {"description": "Basis item or tryout not found"}, + 409: {"description": "Item already exists at this slot/level"}, + 500: {"description": "Database save failed"}, + }, +) +async def generate_save( + request: AISaveRequest, + db: Annotated[AsyncSession, Depends(get_db)], +) -> AISaveResponse: + """ + Save AI-generated question to database. + + - **stem**: Question text + - **options**: Dict with A, B, C, D options + - **correct**: Correct answer (A/B/C/D) + - **explanation**: Answer explanation (optional) + - **tryout_id**: Tryout identifier + - **website_id**: Website identifier + - **basis_item_id**: Original item ID this was generated from + - **slot**: Question slot position + - **level**: Difficulty level + - **ai_model**: AI model used for generation + """ + # Verify basis item exists + basis_result = await db.execute( + select(Item).where(Item.id == request.basis_item_id) + ) + basis_item = basis_result.scalar_one_or_none() + + if not basis_item: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Basis item not found: {request.basis_item_id}", + ) + + # Check for duplicate (same tryout, website, slot, level) + existing_result = await db.execute( + select(Item).where( + and_( + Item.tryout_id == request.tryout_id, + Item.website_id == request.website_id, + Item.slot == request.slot, + Item.level == request.level, + ) + ) + ) + existing = existing_result.scalar_one_or_none() + + if existing: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"Item already exists at slot={request.slot}, level={request.level} " + f"for tryout={request.tryout_id}", + ) + + # Create GeneratedQuestion from request + from app.schemas.ai import GeneratedQuestion + + generated_data = GeneratedQuestion( + stem=request.stem, + options=request.options, + correct=request.correct, + explanation=request.explanation, + ) + + # Save to database + item_id = await save_ai_question( + generated_data=generated_data, + tryout_id=request.tryout_id, + website_id=request.website_id, + basis_item_id=request.basis_item_id, + slot=request.slot, + level=request.level, + ai_model=request.ai_model, + db=db, + ) + + if not item_id: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to save AI-generated question", + ) + + return AISaveResponse( + success=True, + item_id=item_id, + ) + + +@router.get( + "/stats", + response_model=AIStatsResponse, + summary="Get AI generation statistics", + description=""" + Get statistics about AI-generated questions. + + Returns: + - Total AI-generated items count + - Items count by model + - Cache hit rate (placeholder) + """, +) +async def get_stats( + db: Annotated[AsyncSession, Depends(get_db)], +) -> AIStatsResponse: + """ + Get AI generation statistics. + """ + stats = await get_ai_stats(db) + + return AIStatsResponse( + total_ai_items=stats["total_ai_items"], + items_by_model=stats["items_by_model"], + cache_hit_rate=stats["cache_hit_rate"], + total_cache_hits=stats["total_cache_hits"], + total_requests=stats["total_requests"], + ) + + +@router.get( + "/models", + summary="List supported AI models", + description="Returns list of supported AI models for question generation.", +) +async def list_models() -> dict: + """ + List supported AI models. + """ + return { + "models": [ + { + "id": "qwen/qwen-2.5-coder-32b-instruct", + "name": "Qwen 2.5 Coder 32B", + "description": "Fast and efficient model for question generation", + }, + { + "id": "meta-llama/llama-3.3-70b-instruct", + "name": "Llama 3.3 70B", + "description": "High-quality model with better reasoning", + }, + ] + } diff --git a/app/routers/import_export.py b/app/routers/import_export.py new file mode 100644 index 0000000..63f18c2 --- /dev/null +++ b/app/routers/import_export.py @@ -0,0 +1,324 @@ +""" +Import/Export API router for Excel question migration. + +Endpoints: +- POST /api/v1/import/preview: Preview Excel import without saving +- POST /api/v1/import/questions: Import questions from Excel to database +- GET /api/v1/export/questions: Export questions to Excel file +""" + +import os +import tempfile +from typing import Optional + +from fastapi import APIRouter, Depends, File, Form, Header, HTTPException, UploadFile, status +from fastapi.responses import FileResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.services.excel_import import ( + bulk_insert_items, + export_questions_to_excel, + parse_excel_import, + validate_excel_structure, +) + +router = APIRouter(prefix="/api/v1/import-export", tags=["import-export"]) + + +def get_website_id_from_header( + x_website_id: Optional[str] = Header(None, alias="X-Website-ID"), +) -> int: + """ + Extract and validate website_id from request header. + + Args: + x_website_id: Website ID from header + + Returns: + Validated website ID as integer + + Raises: + HTTPException: If header is missing or invalid + """ + if x_website_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID header is required", + ) + try: + return int(x_website_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID must be a valid integer", + ) + + +@router.post( + "/preview", + summary="Preview Excel import", + description="Parse Excel file and return preview without saving to database.", +) +async def preview_import( + file: UploadFile = File(..., description="Excel file (.xlsx)"), + website_id: int = Depends(get_website_id_from_header), +) -> dict: + """ + Preview Excel import without saving to database. + + Args: + file: Excel file upload (.xlsx format) + website_id: Website ID from header + + Returns: + Dict with: + - items_count: Number of items parsed + - preview: List of item previews + - validation_errors: List of validation errors if any + + Raises: + HTTPException: If file format is invalid or parsing fails + """ + # Validate file format + if not file.filename or not file.filename.lower().endswith('.xlsx'): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="File must be .xlsx format", + ) + + # Save uploaded file to temporary location + try: + with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file: + content = await file.read() + temp_file.write(content) + temp_file_path = temp_file.name + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to save uploaded file: {str(e)}", + ) + + try: + # Validate Excel structure + validation = validate_excel_structure(temp_file_path) + if not validation["valid"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "error": "Invalid Excel structure", + "validation_errors": validation["errors"], + }, + ) + + # Parse Excel (tryout_id is optional for preview) + tryout_id = "preview" # Use dummy tryout_id for preview + result = parse_excel_import( + temp_file_path, + website_id=website_id, + tryout_id=tryout_id + ) + + if result["validation_errors"]: + return { + "items_count": result["items_count"], + "preview": result["items"], + "validation_errors": result["validation_errors"], + "has_errors": True, + } + + # Return limited preview (first 5 items) + preview_items = result["items"][:5] + + return { + "items_count": result["items_count"], + "preview": preview_items, + "validation_errors": [], + "has_errors": False, + } + + finally: + # Clean up temporary file + if os.path.exists(temp_file_path): + os.unlink(temp_file_path) + + +@router.post( + "/questions", + summary="Import questions from Excel", + description="Parse Excel file and import questions to database with 100% data integrity.", +) +async def import_questions( + file: UploadFile = File(..., description="Excel file (.xlsx)"), + website_id: int = Depends(get_website_id_from_header), + tryout_id: str = Form(..., description="Tryout identifier"), + db: AsyncSession = Depends(get_db), +) -> dict: + """ + Import questions from Excel to database. + + Validates file format, parses Excel content, checks for duplicates, + and performs bulk insert with rollback on error. + + Args: + file: Excel file upload (.xlsx format) + website_id: Website ID from header + tryout_id: Tryout identifier + db: Async database session + + Returns: + Dict with: + - imported: Number of items successfully imported + - duplicates: Number of duplicate items skipped + - errors: List of errors if any + + Raises: + HTTPException: If file format is invalid, validation fails, or import fails + """ + # Validate file format + if not file.filename or not file.filename.lower().endswith('.xlsx'): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="File must be .xlsx format", + ) + + # Save uploaded file to temporary location + try: + with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file: + content = await file.read() + temp_file.write(content) + temp_file_path = temp_file.name + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to save uploaded file: {str(e)}", + ) + + try: + # Validate Excel structure + validation = validate_excel_structure(temp_file_path) + if not validation["valid"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "error": "Invalid Excel structure", + "validation_errors": validation["errors"], + }, + ) + + # Parse Excel + result = parse_excel_import( + temp_file_path, + website_id=website_id, + tryout_id=tryout_id + ) + + # Check for validation errors + if result["validation_errors"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "error": "Validation failed", + "validation_errors": result["validation_errors"], + }, + ) + + # Check if items were parsed + if result["items_count"] == 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No items found in Excel file", + ) + + # Bulk insert items + insert_result = await bulk_insert_items(result["items"], db) + + # Check for insertion errors + if insert_result["errors"]: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail={ + "error": "Import failed", + "errors": insert_result["errors"], + }, + ) + + # Check for conflicts (duplicates) + if insert_result["duplicate_count"] > 0: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail={ + "message": f"Import completed with {insert_result['duplicate_count']} duplicate(s) skipped", + "imported": insert_result["inserted_count"], + "duplicates": insert_result["duplicate_count"], + }, + ) + + return { + "message": "Import successful", + "imported": insert_result["inserted_count"], + "duplicates": insert_result["duplicate_count"], + } + + finally: + # Clean up temporary file + if os.path.exists(temp_file_path): + os.unlink(temp_file_path) + + +@router.get( + "/export/questions", + summary="Export questions to Excel", + description="Export questions for a tryout to Excel file in standardized format.", +) +async def export_questions( + tryout_id: str, + website_id: int = Depends(get_website_id_from_header), + db: AsyncSession = Depends(get_db), +) -> FileResponse: + """ + Export questions to Excel file. + + Creates Excel file with standardized format: + - Row 2: KUNCI (answer key) + - Row 4: TK (p-values) + - Row 5: BOBOT (weights) + - Rows 6+: Question data + + Args: + tryout_id: Tryout identifier + website_id: Website ID from header + db: Async database session + + Returns: + FileResponse with Excel file + + Raises: + HTTPException: If tryout has no questions or export fails + """ + try: + # Export questions to Excel + output_path = await export_questions_to_excel( + tryout_id=tryout_id, + website_id=website_id, + db=db + ) + + # Return file for download + filename = f"tryout_{tryout_id}_questions.xlsx" + return FileResponse( + path=output_path, + media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + filename=filename, + ) + + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Export failed: {str(e)}", + ) diff --git a/app/routers/normalization.py b/app/routers/normalization.py new file mode 100644 index 0000000..d7c8fe9 --- /dev/null +++ b/app/routers/normalization.py @@ -0,0 +1,279 @@ +""" +Normalization API router for dynamic normalization management. + +Endpoints: +- GET /tryout/{tryout_id}/normalization: Get normalization configuration +- PUT /tryout/{tryout_id}/normalization: Update normalization settings +- POST /tryout/{tryout_id}/normalization/reset: Reset normalization stats +- GET /tryout/{tryout_id}/normalization/validate: Validate dynamic normalization +""" + +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Header, status +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.services.config_management import ( + get_normalization_config, + reset_normalization_stats, + toggle_normalization_mode, + update_config, +) +from app.services.normalization import ( + validate_dynamic_normalization, +) + +router = APIRouter(prefix="/tryout", tags=["normalization"]) + + +def get_website_id_from_header( + x_website_id: Optional[str] = Header(None, alias="X-Website-ID"), +) -> int: + """ + Extract and validate website_id from request header. + + Args: + x_website_id: Website ID from header + + Returns: + Validated website ID as integer + + Raises: + HTTPException: If header is missing or invalid + """ + if x_website_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID header is required", + ) + try: + return int(x_website_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID must be a valid integer", + ) + + +@router.get( + "/{tryout_id}/normalization", + summary="Get normalization configuration", + description="Retrieve current normalization configuration including mode, static values, dynamic values, and threshold status.", +) +async def get_normalization_endpoint( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Get normalization configuration for a tryout. + + Returns: + Normalization configuration with: + - mode (static/dynamic/hybrid) + - current rataan, sb (from TryoutStats) + - static_rataan, static_sb (from Tryout config) + - participant_count + - threshold_status (ready for dynamic or not) + + Raises: + HTTPException: If tryout not found + """ + try: + config = await get_normalization_config(db, website_id, tryout_id) + return config + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + + +@router.put( + "/{tryout_id}/normalization", + summary="Update normalization settings", + description="Update normalization mode and static values for a tryout.", +) +async def update_normalization_endpoint( + tryout_id: str, + normalization_mode: Optional[str] = None, + static_rataan: Optional[float] = None, + static_sb: Optional[float] = None, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Update normalization settings for a tryout. + + Args: + tryout_id: Tryout identifier + normalization_mode: New normalization mode (static/dynamic/hybrid) + static_rataan: New static mean value + static_sb: New static standard deviation + db: Database session + website_id: Website ID from header + + Returns: + Updated normalization configuration + + Raises: + HTTPException: If tryout not found or validation fails + """ + # Build updates dictionary + updates = {} + + if normalization_mode is not None: + if normalization_mode not in ["static", "dynamic", "hybrid"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid normalization_mode: {normalization_mode}. Must be 'static', 'dynamic', or 'hybrid'", + ) + updates["normalization_mode"] = normalization_mode + + if static_rataan is not None: + if static_rataan <= 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="static_rataan must be greater than 0", + ) + updates["static_rataan"] = static_rataan + + if static_sb is not None: + if static_sb <= 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="static_sb must be greater than 0", + ) + updates["static_sb"] = static_sb + + if not updates: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No updates provided", + ) + + try: + # Update configuration + await update_config(db, website_id, tryout_id, updates) + + # Get updated configuration + config = await get_normalization_config(db, website_id, tryout_id) + + return config + + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + + +@router.post( + "/{tryout_id}/normalization/reset", + summary="Reset normalization stats", + description="Reset TryoutStats to initial values and switch to static normalization mode.", +) +async def reset_normalization_endpoint( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Reset normalization stats for a tryout. + + Resets TryoutStats to initial values (participant_count=0, sums cleared) + and temporarily switches normalization_mode to "static". + + Args: + tryout_id: Tryout identifier + db: Database session + website_id: Website ID from header + + Returns: + Success message with updated configuration + + Raises: + HTTPException: If tryout not found + """ + try: + stats = await reset_normalization_stats(db, website_id, tryout_id) + config = await get_normalization_config(db, website_id, tryout_id) + + return { + "message": "Normalization stats reset successfully", + "tryout_id": tryout_id, + "participant_count": stats.participant_count, + "normalization_mode": config["normalization_mode"], + } + + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + + +@router.get( + "/{tryout_id}/normalization/validate", + summary="Validate dynamic normalization", + description="Validate that dynamic normalization produces expected distribution (mean≈500±5, SD≈100±5).", +) +async def validate_normalization_endpoint( + tryout_id: str, + target_mean: float = 500.0, + target_sd: float = 100.0, + mean_tolerance: float = 5.0, + sd_tolerance: float = 5.0, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Validate dynamic normalization for a tryout. + + Checks if calculated rataan and sb are close to target values. + Returns validation status, deviations, warnings, and suggestions. + + Args: + tryout_id: Tryout identifier + target_mean: Target mean (default: 500) + target_sd: Target standard deviation (default: 100) + mean_tolerance: Allowed deviation from target mean (default: 5) + sd_tolerance: Allowed deviation from target SD (default: 5) + db: Database session + website_id: Website ID from header + + Returns: + Validation result with: + - is_valid: True if within tolerance + - details: Full validation details + + Raises: + HTTPException: If tryout not found + """ + try: + is_valid, details = await validate_dynamic_normalization( + db=db, + website_id=website_id, + tryout_id=tryout_id, + target_mean=target_mean, + target_sd=target_sd, + mean_tolerance=mean_tolerance, + sd_tolerance=sd_tolerance, + ) + + return { + "tryout_id": tryout_id, + "is_valid": is_valid, + "target_mean": target_mean, + "target_sd": target_sd, + "mean_tolerance": mean_tolerance, + "sd_tolerance": sd_tolerance, + "details": details, + } + + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) diff --git a/app/routers/reports.py b/app/routers/reports.py new file mode 100644 index 0000000..3e31815 --- /dev/null +++ b/app/routers/reports.py @@ -0,0 +1,792 @@ +""" +Reports API router for comprehensive reporting. + +Endpoints: +- GET /reports/student/performance: Get student performance report +- GET /reports/items/analysis: Get item analysis report +- GET /reports/calibration/status: Get calibration status report +- GET /reports/tryout/comparison: Get tryout comparison report +- POST /reports/schedule: Schedule a report +- GET /reports/export/{schedule_id}/{format}: Export scheduled report +""" + +import os +from datetime import datetime +from typing import List, Literal, Optional + +from fastapi import APIRouter, Depends, HTTPException, Header, status +from fastapi.responses import FileResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.schemas.report import ( + StudentPerformanceReportOutput, + AggregatePerformanceStatsOutput, + StudentPerformanceRecordOutput, + ItemAnalysisReportOutput, + ItemAnalysisRecordOutput, + CalibrationStatusReportOutput, + CalibrationItemStatusOutput, + TryoutComparisonReportOutput, + TryoutComparisonRecordOutput, + ReportScheduleRequest, + ReportScheduleOutput, + ReportScheduleResponse, + ExportResponse, +) +from app.services.reporting import ( + generate_student_performance_report, + generate_item_analysis_report, + generate_calibration_status_report, + generate_tryout_comparison_report, + export_report_to_csv, + export_report_to_excel, + export_report_to_pdf, + schedule_report, + get_scheduled_report, + list_scheduled_reports, + cancel_scheduled_report, + StudentPerformanceReport, + ItemAnalysisReport, + CalibrationStatusReport, + TryoutComparisonReport, +) + +router = APIRouter(prefix="/reports", tags=["reports"]) + + +def get_website_id_from_header( + x_website_id: Optional[str] = Header(None, alias="X-Website-ID"), +) -> int: + """ + Extract and validate website_id from request header. + + Args: + x_website_id: Website ID from header + + Returns: + Validated website ID as integer + + Raises: + HTTPException: If header is missing or invalid + """ + if x_website_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID header is required", + ) + try: + return int(x_website_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID must be a valid integer", + ) + + +# ============================================================================= +# Student Performance Report Endpoints +# ============================================================================= + +@router.get( + "/student/performance", + response_model=StudentPerformanceReportOutput, + summary="Get student performance report", + description="Generate student performance report with individual and aggregate statistics.", +) +async def get_student_performance_report( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), + date_start: Optional[datetime] = None, + date_end: Optional[datetime] = None, + format_type: Literal["individual", "aggregate", "both"] = "both", +) -> StudentPerformanceReportOutput: + """ + Get student performance report. + + Returns individual student records and/or aggregate statistics. + """ + date_range = None + if date_start or date_end: + date_range = {} + if date_start: + date_range["start"] = date_start + if date_end: + date_range["end"] = date_end + + report = await generate_student_performance_report( + tryout_id=tryout_id, + website_id=website_id, + db=db, + date_range=date_range, + format_type=format_type, + ) + + return _convert_student_performance_report(report) + + +def _convert_student_performance_report(report: StudentPerformanceReport) -> StudentPerformanceReportOutput: + """Convert dataclass report to Pydantic output.""" + date_range_str = None + if report.date_range: + date_range_str = {} + if report.date_range.get("start"): + date_range_str["start"] = report.date_range["start"].isoformat() + if report.date_range.get("end"): + date_range_str["end"] = report.date_range["end"].isoformat() + + return StudentPerformanceReportOutput( + generated_at=report.generated_at, + tryout_id=report.tryout_id, + website_id=report.website_id, + date_range=date_range_str, + aggregate=AggregatePerformanceStatsOutput( + tryout_id=report.aggregate.tryout_id, + participant_count=report.aggregate.participant_count, + avg_nm=report.aggregate.avg_nm, + std_nm=report.aggregate.std_nm, + min_nm=report.aggregate.min_nm, + max_nm=report.aggregate.max_nm, + median_nm=report.aggregate.median_nm, + avg_nn=report.aggregate.avg_nn, + std_nn=report.aggregate.std_nn, + avg_theta=report.aggregate.avg_theta, + pass_rate=report.aggregate.pass_rate, + avg_time_spent=report.aggregate.avg_time_spent, + ), + individual_records=[ + StudentPerformanceRecordOutput( + session_id=r.session_id, + wp_user_id=r.wp_user_id, + tryout_id=r.tryout_id, + NM=r.NM, + NN=r.NN, + theta=r.theta, + theta_se=r.theta_se, + total_benar=r.total_benar, + time_spent=r.time_spent, + start_time=r.start_time, + end_time=r.end_time, + scoring_mode_used=r.scoring_mode_used, + rataan_used=r.rataan_used, + sb_used=r.sb_used, + ) + for r in report.individual_records + ], + ) + + +# ============================================================================= +# Item Analysis Report Endpoints +# ============================================================================= + +@router.get( + "/items/analysis", + response_model=ItemAnalysisReportOutput, + summary="Get item analysis report", + description="Generate item analysis report with difficulty, discrimination, and information functions.", +) +async def get_item_analysis_report( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), + filter_by: Optional[Literal["difficulty", "calibrated", "discrimination"]] = None, + difficulty_level: Optional[Literal["mudah", "sedang", "sulit"]] = None, +) -> ItemAnalysisReportOutput: + """ + Get item analysis report. + + Returns item difficulty, discrimination, and information function data. + """ + report = await generate_item_analysis_report( + tryout_id=tryout_id, + website_id=website_id, + db=db, + filter_by=filter_by, + difficulty_level=difficulty_level, + ) + + return ItemAnalysisReportOutput( + generated_at=report.generated_at, + tryout_id=report.tryout_id, + website_id=report.website_id, + total_items=report.total_items, + items=[ + ItemAnalysisRecordOutput( + item_id=r.item_id, + slot=r.slot, + level=r.level, + ctt_p=r.ctt_p, + ctt_bobot=r.ctt_bobot, + ctt_category=r.ctt_category, + irt_b=r.irt_b, + irt_se=r.irt_se, + calibrated=r.calibrated, + calibration_sample_size=r.calibration_sample_size, + correctness_rate=r.correctness_rate, + item_total_correlation=r.item_total_correlation, + information_values=r.information_values, + optimal_theta_range=r.optimal_theta_range, + ) + for r in report.items + ], + summary=report.summary, + ) + + +# ============================================================================= +# Calibration Status Report Endpoints +# ============================================================================= + +@router.get( + "/calibration/status", + response_model=CalibrationStatusReportOutput, + summary="Get calibration status report", + description="Generate calibration status report with progress tracking and readiness metrics.", +) +async def get_calibration_status_report( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +) -> CalibrationStatusReportOutput: + """ + Get calibration status report. + + Returns calibration progress, items awaiting calibration, and IRT readiness status. + """ + report = await generate_calibration_status_report( + tryout_id=tryout_id, + website_id=website_id, + db=db, + ) + + return CalibrationStatusReportOutput( + generated_at=report.generated_at, + tryout_id=report.tryout_id, + website_id=report.website_id, + total_items=report.total_items, + calibrated_items=report.calibrated_items, + calibration_percentage=report.calibration_percentage, + items_awaiting_calibration=[ + CalibrationItemStatusOutput( + item_id=r.item_id, + slot=r.slot, + level=r.level, + sample_size=r.sample_size, + calibrated=r.calibrated, + irt_b=r.irt_b, + irt_se=r.irt_se, + ctt_p=r.ctt_p, + ) + for r in report.items_awaiting_calibration + ], + avg_calibration_sample_size=report.avg_calibration_sample_size, + estimated_time_to_90_percent=report.estimated_time_to_90_percent, + ready_for_irt_rollout=report.ready_for_irt_rollout, + items=[ + CalibrationItemStatusOutput( + item_id=r.item_id, + slot=r.slot, + level=r.level, + sample_size=r.sample_size, + calibrated=r.calibrated, + irt_b=r.irt_b, + irt_se=r.irt_se, + ctt_p=r.ctt_p, + ) + for r in report.items + ], + ) + + +# ============================================================================= +# Tryout Comparison Report Endpoints +# ============================================================================= + +@router.get( + "/tryout/comparison", + response_model=TryoutComparisonReportOutput, + summary="Get tryout comparison report", + description="Generate tryout comparison report across dates or subjects.", +) +async def get_tryout_comparison_report( + tryout_ids: str, # Comma-separated list + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), + group_by: Literal["date", "subject"] = "date", +) -> TryoutComparisonReportOutput: + """ + Get tryout comparison report. + + Compares tryouts across dates or subjects. + """ + tryout_id_list = [tid.strip() for tid in tryout_ids.split(",")] + + if len(tryout_id_list) < 2: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="At least 2 tryout IDs are required for comparison", + ) + + report = await generate_tryout_comparison_report( + tryout_ids=tryout_id_list, + website_id=website_id, + db=db, + group_by=group_by, + ) + + return TryoutComparisonReportOutput( + generated_at=report.generated_at, + comparison_type=report.comparison_type, + tryouts=[ + TryoutComparisonRecordOutput( + tryout_id=r.tryout_id, + date=r.date, + subject=r.subject, + participant_count=r.participant_count, + avg_nm=r.avg_nm, + avg_nn=r.avg_nn, + avg_theta=r.avg_theta, + std_nm=r.std_nm, + calibration_percentage=r.calibration_percentage, + ) + for r in report.tryouts + ], + trends=report.trends, + normalization_impact=report.normalization_impact, + ) + + +# ============================================================================= +# Report Scheduling Endpoints +# ============================================================================= + +@router.post( + "/schedule", + response_model=ReportScheduleResponse, + summary="Schedule a report", + description="Schedule a report for automatic generation on a daily, weekly, or monthly basis.", +) +async def create_report_schedule( + request: ReportScheduleRequest, + db: AsyncSession = Depends(get_db), +) -> ReportScheduleResponse: + """ + Schedule a report. + + Creates a scheduled report that will be generated automatically. + """ + schedule_id = schedule_report( + report_type=request.report_type, + schedule=request.schedule, + tryout_ids=request.tryout_ids, + website_id=request.website_id, + recipients=request.recipients, + export_format=request.export_format, + ) + + scheduled = get_scheduled_report(schedule_id) + + return ReportScheduleResponse( + schedule_id=schedule_id, + message=f"Report scheduled successfully for {request.schedule} generation", + next_run=scheduled.next_run if scheduled else None, + ) + + +@router.get( + "/schedule/{schedule_id}", + response_model=ReportScheduleOutput, + summary="Get scheduled report details", + description="Get details of a scheduled report.", +) +async def get_scheduled_report_details( + schedule_id: str, + website_id: int = Depends(get_website_id_from_header), +) -> ReportScheduleOutput: + """ + Get scheduled report details. + + Returns the configuration and status of a scheduled report. + """ + scheduled = get_scheduled_report(schedule_id) + + if not scheduled: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Scheduled report {schedule_id} not found", + ) + + if scheduled.website_id != website_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied to this scheduled report", + ) + + return ReportScheduleOutput( + schedule_id=scheduled.schedule_id, + report_type=scheduled.report_type, + schedule=scheduled.schedule, + tryout_ids=scheduled.tryout_ids, + website_id=scheduled.website_id, + recipients=scheduled.recipients, + format=scheduled.format, + created_at=scheduled.created_at, + last_run=scheduled.last_run, + next_run=scheduled.next_run, + is_active=scheduled.is_active, + ) + + +@router.get( + "/schedule", + response_model=List[ReportScheduleOutput], + summary="List scheduled reports", + description="List all scheduled reports for a website.", +) +async def list_scheduled_reports_endpoint( + website_id: int = Depends(get_website_id_from_header), +) -> List[ReportScheduleOutput]: + """ + List all scheduled reports. + + Returns all scheduled reports for the current website. + """ + reports = list_scheduled_reports(website_id=website_id) + + return [ + ReportScheduleOutput( + schedule_id=r.schedule_id, + report_type=r.report_type, + schedule=r.schedule, + tryout_ids=r.tryout_ids, + website_id=r.website_id, + recipients=r.recipients, + format=r.format, + created_at=r.created_at, + last_run=r.last_run, + next_run=r.next_run, + is_active=r.is_active, + ) + for r in reports + ] + + +@router.delete( + "/schedule/{schedule_id}", + summary="Cancel scheduled report", + description="Cancel a scheduled report.", +) +async def cancel_scheduled_report_endpoint( + schedule_id: str, + website_id: int = Depends(get_website_id_from_header), +) -> dict: + """ + Cancel a scheduled report. + + Removes the scheduled report from the system. + """ + scheduled = get_scheduled_report(schedule_id) + + if not scheduled: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Scheduled report {schedule_id} not found", + ) + + if scheduled.website_id != website_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied to this scheduled report", + ) + + success = cancel_scheduled_report(schedule_id) + + if not success: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to cancel scheduled report", + ) + + return { + "message": f"Scheduled report {schedule_id} cancelled successfully", + "schedule_id": schedule_id, + } + + +# ============================================================================= +# Report Export Endpoints +# ============================================================================= + +@router.get( + "/export/{schedule_id}/{format}", + summary="Export scheduled report", + description="Generate and export a scheduled report in the specified format.", +) +async def export_scheduled_report( + schedule_id: str, + format: Literal["csv", "xlsx", "pdf"], + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Export a scheduled report. + + Generates the report and returns it as a file download. + """ + scheduled = get_scheduled_report(schedule_id) + + if not scheduled: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Scheduled report {schedule_id} not found", + ) + + if scheduled.website_id != website_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied to this scheduled report", + ) + + # Generate report based on type + report = None + base_filename = f"report_{scheduled.report_type}_{schedule_id}" + + try: + if scheduled.report_type == "student_performance": + if len(scheduled.tryout_ids) > 0: + report = await generate_student_performance_report( + tryout_id=scheduled.tryout_ids[0], + website_id=website_id, + db=db, + ) + elif scheduled.report_type == "item_analysis": + if len(scheduled.tryout_ids) > 0: + report = await generate_item_analysis_report( + tryout_id=scheduled.tryout_ids[0], + website_id=website_id, + db=db, + ) + elif scheduled.report_type == "calibration_status": + if len(scheduled.tryout_ids) > 0: + report = await generate_calibration_status_report( + tryout_id=scheduled.tryout_ids[0], + website_id=website_id, + db=db, + ) + elif scheduled.report_type == "tryout_comparison": + report = await generate_tryout_comparison_report( + tryout_ids=scheduled.tryout_ids, + website_id=website_id, + db=db, + ) + + if not report: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to generate report", + ) + + # Export to requested format + if format == "csv": + file_path = export_report_to_csv(report, base_filename) + media_type = "text/csv" + elif format == "xlsx": + file_path = export_report_to_excel(report, base_filename) + media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + else: # pdf + file_path = export_report_to_pdf(report, base_filename) + media_type = "application/pdf" + + # Return file + return FileResponse( + path=file_path, + media_type=media_type, + filename=os.path.basename(file_path), + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to export report: {str(e)}", + ) + + +# ============================================================================= +# Direct Export Endpoints (without scheduling) +# ============================================================================= + +@router.get( + "/student/performance/export/{format}", + summary="Export student performance report directly", + description="Generate and export student performance report directly without scheduling.", +) +async def export_student_performance_direct( + format: Literal["csv", "xlsx", "pdf"], + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), + date_start: Optional[datetime] = None, + date_end: Optional[datetime] = None, +): + """Export student performance report directly.""" + date_range = None + if date_start or date_end: + date_range = {} + if date_start: + date_range["start"] = date_start + if date_end: + date_range["end"] = date_end + + report = await generate_student_performance_report( + tryout_id=tryout_id, + website_id=website_id, + db=db, + date_range=date_range, + ) + + base_filename = f"student_performance_{tryout_id}" + + if format == "csv": + file_path = export_report_to_csv(report, base_filename) + media_type = "text/csv" + elif format == "xlsx": + file_path = export_report_to_excel(report, base_filename) + media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + else: + file_path = export_report_to_pdf(report, base_filename) + media_type = "application/pdf" + + return FileResponse( + path=file_path, + media_type=media_type, + filename=os.path.basename(file_path), + ) + + +@router.get( + "/items/analysis/export/{format}", + summary="Export item analysis report directly", + description="Generate and export item analysis report directly without scheduling.", +) +async def export_item_analysis_direct( + format: Literal["csv", "xlsx", "pdf"], + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), + filter_by: Optional[Literal["difficulty", "calibrated", "discrimination"]] = None, + difficulty_level: Optional[Literal["mudah", "sedang", "sulit"]] = None, +): + """Export item analysis report directly.""" + report = await generate_item_analysis_report( + tryout_id=tryout_id, + website_id=website_id, + db=db, + filter_by=filter_by, + difficulty_level=difficulty_level, + ) + + base_filename = f"item_analysis_{tryout_id}" + + if format == "csv": + file_path = export_report_to_csv(report, base_filename) + media_type = "text/csv" + elif format == "xlsx": + file_path = export_report_to_excel(report, base_filename) + media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + else: + file_path = export_report_to_pdf(report, base_filename) + media_type = "application/pdf" + + return FileResponse( + path=file_path, + media_type=media_type, + filename=os.path.basename(file_path), + ) + + +@router.get( + "/calibration/status/export/{format}", + summary="Export calibration status report directly", + description="Generate and export calibration status report directly without scheduling.", +) +async def export_calibration_status_direct( + format: Literal["csv", "xlsx", "pdf"], + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """Export calibration status report directly.""" + report = await generate_calibration_status_report( + tryout_id=tryout_id, + website_id=website_id, + db=db, + ) + + base_filename = f"calibration_status_{tryout_id}" + + if format == "csv": + file_path = export_report_to_csv(report, base_filename) + media_type = "text/csv" + elif format == "xlsx": + file_path = export_report_to_excel(report, base_filename) + media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + else: + file_path = export_report_to_pdf(report, base_filename) + media_type = "application/pdf" + + return FileResponse( + path=file_path, + media_type=media_type, + filename=os.path.basename(file_path), + ) + + +@router.get( + "/tryout/comparison/export/{format}", + summary="Export tryout comparison report directly", + description="Generate and export tryout comparison report directly without scheduling.", +) +async def export_tryout_comparison_direct( + format: Literal["csv", "xlsx", "pdf"], + tryout_ids: str, # Comma-separated + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), + group_by: Literal["date", "subject"] = "date", +): + """Export tryout comparison report directly.""" + tryout_id_list = [tid.strip() for tid in tryout_ids.split(",")] + + if len(tryout_id_list) < 2: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="At least 2 tryout IDs are required for comparison", + ) + + report = await generate_tryout_comparison_report( + tryout_ids=tryout_id_list, + website_id=website_id, + db=db, + group_by=group_by, + ) + + base_filename = "tryout_comparison" + + if format == "csv": + file_path = export_report_to_csv(report, base_filename) + media_type = "text/csv" + elif format == "xlsx": + file_path = export_report_to_excel(report, base_filename) + media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + else: + file_path = export_report_to_pdf(report, base_filename) + media_type = "application/pdf" + + return FileResponse( + path=file_path, + media_type=media_type, + filename=os.path.basename(file_path), + ) diff --git a/app/routers/sessions.py b/app/routers/sessions.py new file mode 100644 index 0000000..a39d64a --- /dev/null +++ b/app/routers/sessions.py @@ -0,0 +1,402 @@ +""" +Session API router for tryout session management. + +Endpoints: +- POST /session/{session_id}/complete: Submit answers and complete session +- GET /session/{session_id}: Get session details +- POST /session: Create new session +""" + +from datetime import datetime, timezone +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Header, status +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.database import get_db +from app.models.item import Item +from app.models.session import Session +from app.models.tryout import Tryout +from app.models.tryout_stats import TryoutStats +from app.models.user_answer import UserAnswer +from app.schemas.session import ( + SessionCompleteRequest, + SessionCompleteResponse, + SessionCreateRequest, + SessionResponse, + UserAnswerOutput, +) +from app.services.ctt_scoring import ( + calculate_ctt_bobot, + calculate_ctt_nm, + calculate_ctt_nn, + get_total_bobot_max, + update_tryout_stats, +) + +router = APIRouter(prefix="/session", tags=["sessions"]) + + +def get_website_id_from_header( + x_website_id: Optional[str] = Header(None, alias="X-Website-ID"), +) -> int: + """ + Extract and validate website_id from request header. + + Args: + x_website_id: Website ID from header + + Returns: + Validated website ID as integer + + Raises: + HTTPException: If header is missing or invalid + """ + if x_website_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID header is required", + ) + try: + return int(x_website_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID must be a valid integer", + ) + + +@router.post( + "/{session_id}/complete", + response_model=SessionCompleteResponse, + summary="Complete session with answers", + description="Submit user answers, calculate CTT scores, and complete the session.", +) +async def complete_session( + session_id: str, + request: SessionCompleteRequest, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +) -> SessionCompleteResponse: + """ + Complete a session by submitting answers and calculating CTT scores. + + Process: + 1. Validate session exists and is not completed + 2. For each answer: check is_correct, calculate bobot_earned + 3. Save UserAnswer records + 4. Calculate CTT scores (total_benar, total_bobot_earned, NM) + 5. Update Session with CTT results + 6. Update TryoutStats incrementally + 7. Return session with scores + + Args: + session_id: Unique session identifier + request: Session completion request with end_time and user_answers + db: Database session + website_id: Website ID from header + + Returns: + SessionCompleteResponse with CTT scores + + Raises: + HTTPException: If session not found, already completed, or validation fails + """ + # Get session with tryout relationship + result = await db.execute( + select(Session) + .options(selectinload(Session.tryout)) + .where( + Session.session_id == session_id, + Session.website_id == website_id, + ) + ) + session = result.scalar_one_or_none() + + if session is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Session {session_id} not found", + ) + + if session.is_completed: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Session is already completed", + ) + + # Get tryout configuration + tryout = session.tryout + + # Get all items for this tryout to calculate bobot + items_result = await db.execute( + select(Item).where( + Item.website_id == website_id, + Item.tryout_id == session.tryout_id, + ) + ) + items = {item.id: item for item in items_result.scalars().all()} + + # Process each answer + total_benar = 0 + total_bobot_earned = 0.0 + user_answer_records = [] + + for answer_input in request.user_answers: + item = items.get(answer_input.item_id) + + if item is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Item {answer_input.item_id} not found in tryout {session.tryout_id}", + ) + + # Check if answer is correct + is_correct = answer_input.response.upper() == item.correct_answer.upper() + + # Calculate bobot_earned (only if correct) + bobot_earned = 0.0 + if is_correct: + total_benar += 1 + if item.ctt_bobot is not None: + bobot_earned = item.ctt_bobot + total_bobot_earned += bobot_earned + + # Create UserAnswer record + user_answer = UserAnswer( + session_id=session.session_id, + wp_user_id=session.wp_user_id, + website_id=website_id, + tryout_id=session.tryout_id, + item_id=item.id, + response=answer_input.response.upper(), + is_correct=is_correct, + time_spent=answer_input.time_spent, + scoring_mode_used=session.scoring_mode_used, + bobot_earned=bobot_earned, + ) + user_answer_records.append(user_answer) + db.add(user_answer) + + # Calculate total_bobot_max for NM calculation + try: + total_bobot_max = await get_total_bobot_max( + db, website_id, session.tryout_id, level="sedang" + ) + except ValueError: + # Fallback: calculate from items we have + total_bobot_max = sum( + item.ctt_bobot or 0 for item in items.values() if item.level == "sedang" + ) + if total_bobot_max == 0: + # If no bobot values, use count of questions + total_bobot_max = len(items) + + # Calculate CTT NM (Nilai Mentah) + nm = calculate_ctt_nm(total_bobot_earned, total_bobot_max) + + # Get normalization parameters based on tryout configuration + if tryout.normalization_mode == "static": + rataan = tryout.static_rataan + sb = tryout.static_sb + elif tryout.normalization_mode == "dynamic": + # Get current stats for dynamic normalization + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == session.tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + if stats and stats.participant_count >= tryout.min_sample_for_dynamic: + rataan = stats.rataan or tryout.static_rataan + sb = stats.sb or tryout.static_sb + else: + # Not enough data, use static values + rataan = tryout.static_rataan + sb = tryout.static_sb + else: # hybrid + # Hybrid: use dynamic if enough data, otherwise static + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == session.tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + if stats and stats.participant_count >= tryout.min_sample_for_dynamic: + rataan = stats.rataan or tryout.static_rataan + sb = stats.sb or tryout.static_sb + else: + rataan = tryout.static_rataan + sb = tryout.static_sb + + # Calculate CTT NN (Nilai Nasional) + nn = calculate_ctt_nn(nm, rataan, sb) + + # Update session with results + session.end_time = request.end_time + session.is_completed = True + session.total_benar = total_benar + session.total_bobot_earned = total_bobot_earned + session.NM = nm + session.NN = nn + session.rataan_used = rataan + session.sb_used = sb + + # Update tryout stats incrementally + await update_tryout_stats(db, website_id, session.tryout_id, nm) + + # Commit all changes + await db.commit() + + # Refresh to get updated relationships + await db.refresh(session) + + # Build response + return SessionCompleteResponse( + id=session.id, + session_id=session.session_id, + wp_user_id=session.wp_user_id, + website_id=session.website_id, + tryout_id=session.tryout_id, + start_time=session.start_time, + end_time=session.end_time, + is_completed=session.is_completed, + scoring_mode_used=session.scoring_mode_used, + total_benar=session.total_benar, + total_bobot_earned=session.total_bobot_earned, + NM=session.NM, + NN=session.NN, + rataan_used=session.rataan_used, + sb_used=session.sb_used, + user_answers=[ + UserAnswerOutput( + id=ua.id, + item_id=ua.item_id, + response=ua.response, + is_correct=ua.is_correct, + time_spent=ua.time_spent, + bobot_earned=ua.bobot_earned, + scoring_mode_used=ua.scoring_mode_used, + ) + for ua in user_answer_records + ], + ) + + +@router.get( + "/{session_id}", + response_model=SessionResponse, + summary="Get session details", + description="Retrieve session details including scores if completed.", +) +async def get_session( + session_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +) -> SessionResponse: + """ + Get session details. + + Args: + session_id: Unique session identifier + db: Database session + website_id: Website ID from header + + Returns: + SessionResponse with session details + + Raises: + HTTPException: If session not found + """ + result = await db.execute( + select(Session).where( + Session.session_id == session_id, + Session.website_id == website_id, + ) + ) + session = result.scalar_one_or_none() + + if session is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Session {session_id} not found", + ) + + return SessionResponse.model_validate(session) + + +@router.post( + "/", + response_model=SessionResponse, + status_code=status.HTTP_201_CREATED, + summary="Create new session", + description="Create a new tryout session for a student.", +) +async def create_session( + request: SessionCreateRequest, + db: AsyncSession = Depends(get_db), +) -> SessionResponse: + """ + Create a new session. + + Args: + request: Session creation request + db: Database session + + Returns: + SessionResponse with created session + + Raises: + HTTPException: If tryout not found or session already exists + """ + # Verify tryout exists + tryout_result = await db.execute( + select(Tryout).where( + Tryout.website_id == request.website_id, + Tryout.tryout_id == request.tryout_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {request.tryout_id} not found for website {request.website_id}", + ) + + # Check if session already exists + existing_result = await db.execute( + select(Session).where(Session.session_id == request.session_id) + ) + existing_session = existing_result.scalar_one_or_none() + + if existing_session: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"Session {request.session_id} already exists", + ) + + # Create new session + session = Session( + session_id=request.session_id, + wp_user_id=request.wp_user_id, + website_id=request.website_id, + tryout_id=request.tryout_id, + scoring_mode_used=request.scoring_mode, + start_time=datetime.now(timezone.utc), + is_completed=False, + total_benar=0, + total_bobot_earned=0.0, + ) + + db.add(session) + await db.commit() + await db.refresh(session) + + return SessionResponse.model_validate(session) diff --git a/app/routers/tryouts.py b/app/routers/tryouts.py new file mode 100644 index 0000000..734731a --- /dev/null +++ b/app/routers/tryouts.py @@ -0,0 +1,458 @@ +""" +Tryout API router for tryout configuration and management. + +Endpoints: +- GET /tryout/{tryout_id}/config: Get tryout configuration +- PUT /tryout/{tryout_id}/normalization: Update normalization settings +- GET /tryout: List tryouts for a website +""" + +from typing import List, Optional + +from fastapi import APIRouter, Depends, HTTPException, Header, status +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.database import get_db +from app.models.item import Item +from app.models.tryout import Tryout +from app.models.tryout_stats import TryoutStats +from app.schemas.tryout import ( + NormalizationUpdateRequest, + NormalizationUpdateResponse, + TryoutConfigBrief, + TryoutConfigResponse, + TryoutStatsResponse, +) + +router = APIRouter(prefix="/tryout", tags=["tryouts"]) + + +def get_website_id_from_header( + x_website_id: Optional[str] = Header(None, alias="X-Website-ID"), +) -> int: + """ + Extract and validate website_id from request header. + + Args: + x_website_id: Website ID from header + + Returns: + Validated website ID as integer + + Raises: + HTTPException: If header is missing or invalid + """ + if x_website_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID header is required", + ) + try: + return int(x_website_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID must be a valid integer", + ) + + +@router.get( + "/{tryout_id}/config", + response_model=TryoutConfigResponse, + summary="Get tryout configuration", + description="Retrieve tryout configuration including scoring mode, normalization settings, and current stats.", +) +async def get_tryout_config( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +) -> TryoutConfigResponse: + """ + Get tryout configuration. + + Returns: + TryoutConfigResponse with scoring_mode, normalization_mode, and current_stats + + Raises: + HTTPException: If tryout not found + """ + # Get tryout with stats + result = await db.execute( + select(Tryout) + .options(selectinload(Tryout.stats)) + .where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {tryout_id} not found for website {website_id}", + ) + + # Build stats response + current_stats = None + if tryout.stats: + current_stats = TryoutStatsResponse( + participant_count=tryout.stats.participant_count, + rataan=tryout.stats.rataan, + sb=tryout.stats.sb, + min_nm=tryout.stats.min_nm, + max_nm=tryout.stats.max_nm, + last_calculated=tryout.stats.last_calculated, + ) + + return TryoutConfigResponse( + id=tryout.id, + website_id=tryout.website_id, + tryout_id=tryout.tryout_id, + name=tryout.name, + description=tryout.description, + scoring_mode=tryout.scoring_mode, + selection_mode=tryout.selection_mode, + normalization_mode=tryout.normalization_mode, + min_sample_for_dynamic=tryout.min_sample_for_dynamic, + static_rataan=tryout.static_rataan, + static_sb=tryout.static_sb, + ai_generation_enabled=tryout.ai_generation_enabled, + hybrid_transition_slot=tryout.hybrid_transition_slot, + min_calibration_sample=tryout.min_calibration_sample, + theta_estimation_method=tryout.theta_estimation_method, + fallback_to_ctt_on_error=tryout.fallback_to_ctt_on_error, + current_stats=current_stats, + created_at=tryout.created_at, + updated_at=tryout.updated_at, + ) + + +@router.put( + "/{tryout_id}/normalization", + response_model=NormalizationUpdateResponse, + summary="Update normalization settings", + description="Update normalization mode and static values for a tryout.", +) +async def update_normalization( + tryout_id: str, + request: NormalizationUpdateRequest, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +) -> NormalizationUpdateResponse: + """ + Update normalization settings for a tryout. + + Args: + tryout_id: Tryout identifier + request: Normalization update request + db: Database session + website_id: Website ID from header + + Returns: + NormalizationUpdateResponse with updated settings + + Raises: + HTTPException: If tryout not found or validation fails + """ + # Get tryout + result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {tryout_id} not found for website {website_id}", + ) + + # Update normalization mode if provided + if request.normalization_mode is not None: + tryout.normalization_mode = request.normalization_mode + + # Update static values if provided + if request.static_rataan is not None: + tryout.static_rataan = request.static_rataan + + if request.static_sb is not None: + tryout.static_sb = request.static_sb + + # Get current stats for participant count + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + current_participant_count = stats.participant_count if stats else 0 + + await db.commit() + await db.refresh(tryout) + + return NormalizationUpdateResponse( + tryout_id=tryout.tryout_id, + normalization_mode=tryout.normalization_mode, + static_rataan=tryout.static_rataan, + static_sb=tryout.static_sb, + will_switch_to_dynamic_at=tryout.min_sample_for_dynamic, + current_participant_count=current_participant_count, + ) + + +@router.get( + "/", + response_model=List[TryoutConfigBrief], + summary="List tryouts", + description="List all tryouts for a website.", +) +async def list_tryouts( + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +) -> List[TryoutConfigBrief]: + """ + List all tryouts for a website. + + Args: + db: Database session + website_id: Website ID from header + + Returns: + List of TryoutConfigBrief + """ + # Get tryouts with stats + result = await db.execute( + select(Tryout) + .options(selectinload(Tryout.stats)) + .where(Tryout.website_id == website_id) + ) + tryouts = result.scalars().all() + + return [ + TryoutConfigBrief( + tryout_id=t.tryout_id, + name=t.name, + scoring_mode=t.scoring_mode, + selection_mode=t.selection_mode, + normalization_mode=t.normalization_mode, + participant_count=t.stats.participant_count if t.stats else 0, + ) + for t in tryouts + ] + + +@router.get( + "/{tryout_id}/calibration-status", + summary="Get calibration status", + description="Get IRT calibration status for items in this tryout.", +) +async def get_calibration_status( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Get calibration status for items in a tryout. + + Returns statistics on how many items are calibrated and ready for IRT. + + Args: + tryout_id: Tryout identifier + db: Database session + website_id: Website ID from header + + Returns: + Calibration status summary + + Raises: + HTTPException: If tryout not found + """ + # Verify tryout exists + tryout_result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {tryout_id} not found for website {website_id}", + ) + + # Get calibration statistics + stats_result = await db.execute( + select( + func.count().label("total_items"), + func.sum(func.cast(Item.calibrated, type_=func.INTEGER)).label("calibrated_items"), + func.avg(Item.calibration_sample_size).label("avg_sample_size"), + ).where( + Item.website_id == website_id, + Item.tryout_id == tryout_id, + ) + ) + stats = stats_result.first() + + total_items = stats.total_items or 0 + calibrated_items = stats.calibrated_items or 0 + calibration_percentage = (calibrated_items / total_items * 100) if total_items > 0 else 0 + + return { + "tryout_id": tryout_id, + "total_items": total_items, + "calibrated_items": calibrated_items, + "calibration_percentage": round(calibration_percentage, 2), + "avg_sample_size": round(stats.avg_sample_size, 2) if stats.avg_sample_size else 0, + "min_calibration_sample": tryout.min_calibration_sample, + "ready_for_irt": calibration_percentage >= 90, + } + + +@router.post( + "/{tryout_id}/calibrate", + summary="Trigger IRT calibration", + description="Trigger IRT calibration for all items in this tryout with sufficient response data.", +) +async def trigger_calibration( + tryout_id: str, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Trigger IRT calibration for all items in a tryout. + + Runs calibration for items with >= min_calibration_sample responses. + Updates item.irt_b, item.irt_se, and item.calibrated status. + + Args: + tryout_id: Tryout identifier + db: Database session + website_id: Website ID from header + + Returns: + Calibration results summary + + Raises: + HTTPException: If tryout not found or calibration fails + """ + from app.services.irt_calibration import ( + calibrate_all, + CALIBRATION_SAMPLE_THRESHOLD, + ) + + # Verify tryout exists + tryout_result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {tryout_id} not found for website {website_id}", + ) + + # Run calibration + result = await calibrate_all( + tryout_id=tryout_id, + website_id=website_id, + db=db, + min_sample_size=tryout.min_calibration_sample or CALIBRATION_SAMPLE_THRESHOLD, + ) + + return { + "tryout_id": tryout_id, + "total_items": result.total_items, + "calibrated_items": result.calibrated_items, + "failed_items": result.failed_items, + "calibration_percentage": round(result.calibration_percentage * 100, 2), + "ready_for_irt": result.ready_for_irt, + "message": f"Calibration complete: {result.calibrated_items}/{result.total_items} items calibrated", + } + + +@router.post( + "/{tryout_id}/calibrate/{item_id}", + summary="Trigger IRT calibration for single item", + description="Trigger IRT calibration for a specific item.", +) +async def trigger_item_calibration( + tryout_id: str, + item_id: int, + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), +): + """ + Trigger IRT calibration for a single item. + + Args: + tryout_id: Tryout identifier + item_id: Item ID to calibrate + db: Database session + website_id: Website ID from header + + Returns: + Calibration result for the item + + Raises: + HTTPException: If tryout or item not found + """ + from app.services.irt_calibration import calibrate_item, CALIBRATION_SAMPLE_THRESHOLD + + # Verify tryout exists + tryout_result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Tryout {tryout_id} not found for website {website_id}", + ) + + # Verify item belongs to this tryout + item_result = await db.execute( + select(Item).where( + Item.id == item_id, + Item.website_id == website_id, + Item.tryout_id == tryout_id, + ) + ) + item = item_result.scalar_one_or_none() + + if item is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Item {item_id} not found in tryout {tryout_id}", + ) + + # Run calibration + result = await calibrate_item( + item_id=item_id, + db=db, + min_sample_size=tryout.min_calibration_sample or CALIBRATION_SAMPLE_THRESHOLD, + ) + + return { + "item_id": result.item_id, + "status": result.status.value, + "irt_b": result.irt_b, + "irt_se": result.irt_se, + "sample_size": result.sample_size, + "message": result.message, + } diff --git a/app/routers/wordpress.py b/app/routers/wordpress.py new file mode 100644 index 0000000..d1cfb21 --- /dev/null +++ b/app/routers/wordpress.py @@ -0,0 +1,384 @@ +""" +WordPress Integration API Router. + +Endpoints: +- POST /wordpress/sync_users: Synchronize users from WordPress +- POST /wordpress/verify_session: Verify WordPress session/token +- GET /wordpress/website/{website_id}/users: Get all users for a website +""" + +import logging +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Header, status +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.models.user import User +from app.models.website import Website +from app.schemas.wordpress import ( + SyncUsersResponse, + SyncStatsResponse, + UserListResponse, + VerifySessionRequest, + VerifySessionResponse, + WordPressUserResponse, +) +from app.services.wordpress_auth import ( + get_wordpress_user, + sync_wordpress_users, + verify_website_exists, + verify_wordpress_token, + get_or_create_user, + WordPressAPIError, + WordPressRateLimitError, + WordPressTokenInvalidError, + WebsiteNotFoundError, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/wordpress", tags=["wordpress"]) + + +def get_website_id_from_header( + x_website_id: Optional[str] = Header(None, alias="X-Website-ID"), +) -> int: + """ + Extract and validate website_id from request header. + + Args: + x_website_id: Website ID from header + + Returns: + Validated website ID as integer + + Raises: + HTTPException: If header is missing or invalid + """ + if x_website_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID header is required", + ) + try: + return int(x_website_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Website-ID must be a valid integer", + ) + + +async def get_valid_website( + website_id: int, + db: AsyncSession, +) -> Website: + """ + Validate website_id exists and return Website model. + + Args: + website_id: Website identifier + db: Database session + + Returns: + Website model instance + + Raises: + HTTPException: If website not found + """ + try: + return await verify_website_exists(website_id, db) + except WebsiteNotFoundError: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Website {website_id} not found", + ) + + +@router.post( + "/sync_users", + response_model=SyncUsersResponse, + summary="Synchronize users from WordPress", + description="Fetch all users from WordPress API and sync to local database. Requires admin WordPress token.", +) +async def sync_users_endpoint( + db: AsyncSession = Depends(get_db), + website_id: int = Depends(get_website_id_from_header), + authorization: Optional[str] = Header(None, alias="Authorization"), +) -> SyncUsersResponse: + """ + Synchronize users from WordPress to local database. + + Process: + 1. Validate website_id exists + 2. Extract admin token from Authorization header + 3. Fetch all users from WordPress API + 4. Upsert: Update existing users, insert new users + 5. Return sync statistics + + Args: + db: Database session + website_id: Website ID from header + authorization: Authorization header with Bearer token + + Returns: + SyncUsersResponse with sync statistics + + Raises: + HTTPException: If website not found, token invalid, or API error + """ + # Validate website exists + await get_valid_website(website_id, db) + + # Extract token from Authorization header + if authorization is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Authorization header is required", + ) + + # Parse Bearer token + parts = authorization.split() + if len(parts) != 2 or parts[0].lower() != "bearer": + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid Authorization header format. Use: Bearer {token}", + ) + + admin_token = parts[1] + + try: + sync_stats = await sync_wordpress_users( + website_id=website_id, + admin_token=admin_token, + db=db, + ) + + return SyncUsersResponse( + synced=SyncStatsResponse( + inserted=sync_stats.inserted, + updated=sync_stats.updated, + total=sync_stats.total, + errors=sync_stats.errors, + ), + website_id=website_id, + message=f"Sync completed: {sync_stats.inserted} inserted, {sync_stats.updated} updated", + ) + + except WordPressTokenInvalidError as e: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=str(e), + ) + except WordPressRateLimitError as e: + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail=str(e), + ) + except WordPressAPIError as e: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=str(e), + ) + except WebsiteNotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + + +@router.post( + "/verify_session", + response_model=VerifySessionResponse, + summary="Verify WordPress session", + description="Verify WordPress JWT token and user identity.", +) +async def verify_session_endpoint( + request: VerifySessionRequest, + db: AsyncSession = Depends(get_db), +) -> VerifySessionResponse: + """ + Verify WordPress session/token. + + Process: + 1. Validate website_id exists + 2. Call WordPress API to verify token + 3. Verify wp_user_id matches token owner + 4. Get or create local user + 5. Return validation result + + Args: + request: VerifySessionRequest with wp_user_id, token, website_id + db: Database session + + Returns: + VerifySessionResponse with validation result + + Raises: + HTTPException: If website not found or API error + """ + # Validate website exists + await get_valid_website(request.website_id, db) + + try: + # Verify token with WordPress + wp_user_info = await verify_wordpress_token( + token=request.token, + website_id=request.website_id, + wp_user_id=request.wp_user_id, + db=db, + ) + + if wp_user_info is None: + return VerifySessionResponse( + valid=False, + error="User ID mismatch or invalid credentials", + ) + + # Get or create local user + user = await get_or_create_user( + wp_user_id=request.wp_user_id, + website_id=request.website_id, + db=db, + ) + + return VerifySessionResponse( + valid=True, + user=WordPressUserResponse.model_validate(user), + wp_user_info={ + "username": wp_user_info.username, + "email": wp_user_info.email, + "display_name": wp_user_info.display_name, + "roles": wp_user_info.roles, + }, + ) + + except WordPressTokenInvalidError as e: + return VerifySessionResponse( + valid=False, + error=f"Invalid credentials: {str(e)}", + ) + except WordPressRateLimitError as e: + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail=str(e), + ) + except WordPressAPIError as e: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=str(e), + ) + except WebsiteNotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + + +@router.get( + "/website/{website_id}/users", + response_model=UserListResponse, + summary="Get users for website", + description="Retrieve all users for a specific website from local database with pagination.", +) +async def get_website_users( + website_id: int, + db: AsyncSession = Depends(get_db), + page: int = 1, + page_size: int = 50, +) -> UserListResponse: + """ + Get all users for a website. + + Args: + website_id: Website identifier + db: Database session + page: Page number (default: 1) + page_size: Number of users per page (default: 50, max: 100) + + Returns: + UserListResponse with paginated user list + + Raises: + HTTPException: If website not found + """ + # Validate website exists + await get_valid_website(website_id, db) + + # Clamp page_size + page_size = min(max(1, page_size), 100) + page = max(1, page) + + # Get total count + count_result = await db.execute( + select(func.count()).select_from(User).where(User.website_id == website_id) + ) + total = count_result.scalar() or 0 + + # Calculate pagination + offset = (page - 1) * page_size + total_pages = (total + page_size - 1) // page_size if total > 0 else 1 + + # Get users + result = await db.execute( + select(User) + .where(User.website_id == website_id) + .order_by(User.id) + .offset(offset) + .limit(page_size) + ) + users = result.scalars().all() + + return UserListResponse( + users=[WordPressUserResponse.model_validate(user) for user in users], + total=total, + page=page, + page_size=page_size, + total_pages=total_pages, + ) + + +@router.get( + "/website/{website_id}/user/{wp_user_id}", + response_model=WordPressUserResponse, + summary="Get specific user", + description="Retrieve a specific user by WordPress user ID.", +) +async def get_user_endpoint( + website_id: int, + wp_user_id: str, + db: AsyncSession = Depends(get_db), +) -> WordPressUserResponse: + """ + Get a specific user by WordPress user ID. + + Args: + website_id: Website identifier + wp_user_id: WordPress user ID + db: Database session + + Returns: + WordPressUserResponse with user data + + Raises: + HTTPException: If website or user not found + """ + # Validate website exists + await get_valid_website(website_id, db) + + # Get user + user = await get_wordpress_user( + wp_user_id=wp_user_id, + website_id=website_id, + db=db, + ) + + if user is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"User {wp_user_id} not found for website {website_id}", + ) + + return WordPressUserResponse.model_validate(user) diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py new file mode 100644 index 0000000..575b06e --- /dev/null +++ b/app/schemas/__init__.py @@ -0,0 +1,65 @@ +""" +Pydantic schemas package. +""" + +from app.schemas.ai import ( + AIGeneratePreviewRequest, + AIGeneratePreviewResponse, + AISaveRequest, + AISaveResponse, + AIStatsResponse, + GeneratedQuestion, +) +from app.schemas.session import ( + SessionCompleteRequest, + SessionCompleteResponse, + SessionCreateRequest, + SessionResponse, + UserAnswerInput, + UserAnswerOutput, +) +from app.schemas.tryout import ( + NormalizationUpdateRequest, + NormalizationUpdateResponse, + TryoutConfigBrief, + TryoutConfigResponse, + TryoutStatsResponse, +) +from app.schemas.wordpress import ( + SyncStatsResponse, + SyncUsersResponse, + UserListResponse, + VerifySessionRequest, + VerifySessionResponse, + WordPressUserResponse, +) + +__all__ = [ + # AI schemas + "AIGeneratePreviewRequest", + "AIGeneratePreviewResponse", + "AISaveRequest", + "AISaveResponse", + "AIStatsResponse", + "GeneratedQuestion", + # Session schemas + "UserAnswerInput", + "UserAnswerOutput", + "SessionCompleteRequest", + "SessionCompleteResponse", + "SessionCreateRequest", + "SessionResponse", + # Tryout schemas + "TryoutConfigResponse", + "TryoutStatsResponse", + "TryoutConfigBrief", + "NormalizationUpdateRequest", + "NormalizationUpdateResponse", + # WordPress schemas + "SyncStatsResponse", + "SyncUsersResponse", + "UserListResponse", + "VerifySessionRequest", + "VerifySessionResponse", + "WordPressUserResponse", +] diff --git a/app/schemas/ai.py b/app/schemas/ai.py new file mode 100644 index 0000000..199cfb7 --- /dev/null +++ b/app/schemas/ai.py @@ -0,0 +1,102 @@ +""" +Pydantic schemas for AI generation endpoints. + +Request/response models for admin AI generation playground. +""" + +from typing import Dict, Literal, Optional + +from pydantic import BaseModel, Field, field_validator + + +class AIGeneratePreviewRequest(BaseModel): + basis_item_id: int = Field( + ..., description="ID of the basis item (must be sedang level)" + ) + target_level: Literal["mudah", "sulit"] = Field( + ..., description="Target difficulty level for generated question" + ) + ai_model: str = Field( + default="qwen/qwen-2.5-coder-32b-instruct", + description="AI model to use for generation", + ) + + +class AIGeneratePreviewResponse(BaseModel): + success: bool = Field(..., description="Whether generation was successful") + stem: Optional[str] = None + options: Optional[Dict[str, str]] = None + correct: Optional[str] = None + explanation: Optional[str] = None + ai_model: Optional[str] = None + basis_item_id: Optional[int] = None + target_level: Optional[str] = None + error: Optional[str] = None + cached: bool = False + + +class AISaveRequest(BaseModel): + stem: str = Field(..., description="Question stem") + options: Dict[str, str] = Field( + ..., description="Answer options (A, B, C, D)" + ) + correct: str = Field(..., description="Correct answer (A/B/C/D)") + explanation: Optional[str] = None + tryout_id: str = Field(..., description="Tryout identifier") + website_id: int = Field(..., description="Website identifier") + basis_item_id: int = Field(..., description="Basis item ID") + slot: int = Field(..., description="Question slot position") + level: Literal["mudah", "sedang", "sulit"] = Field( + ..., description="Difficulty level" + ) + ai_model: str = Field( + default="qwen/qwen-2.5-coder-32b-instruct", + description="AI model used for generation", + ) + + @field_validator("correct") + @classmethod + def validate_correct(cls, v: str) -> str: + if v.upper() not in ["A", "B", "C", "D"]: + raise ValueError("Correct answer must be A, B, C, or D") + return v.upper() + + @field_validator("options") + @classmethod + def validate_options(cls, v: Dict[str, str]) -> Dict[str, str]: + required_keys = {"A", "B", "C", "D"} + if not required_keys.issubset(set(v.keys())): + raise ValueError("Options must contain keys A, B, C, D") + return v + + +class AISaveResponse(BaseModel): + success: bool = Field(..., description="Whether save was successful") + item_id: Optional[int] = None + error: Optional[str] = None + + +class AIStatsResponse(BaseModel): + total_ai_items: int = Field(..., description="Total AI-generated items") + items_by_model: Dict[str, int] = Field( + default_factory=dict, description="Items count by AI model" + ) + cache_hit_rate: float = Field( + default=0.0, description="Cache hit rate (0.0 to 1.0)" + ) + total_cache_hits: int = Field(default=0, description="Total cache hits") + total_requests: int = Field(default=0, description="Total generation requests") + + +class GeneratedQuestion(BaseModel): + stem: str + options: Dict[str, str] + correct: str + explanation: Optional[str] = None + + @field_validator("correct") + @classmethod + def validate_correct(cls, v: str) -> str: + if v.upper() not in ["A", "B", "C", "D"]: + raise ValueError("Correct answer must be A, B, C, or D") + return v.upper() diff --git a/app/schemas/report.py b/app/schemas/report.py new file mode 100644 index 0000000..127c340 --- /dev/null +++ b/app/schemas/report.py @@ -0,0 +1,264 @@ +""" +Pydantic schemas for Report API endpoints. +""" + +from datetime import datetime +from typing import Any, Dict, List, Literal, Optional + +from pydantic import BaseModel, Field + + +# ============================================================================= +# Student Performance Report Schemas +# ============================================================================= + +class StudentPerformanceRecordOutput(BaseModel): + """Individual student performance record output.""" + + session_id: str + wp_user_id: str + tryout_id: str + NM: Optional[int] = None + NN: Optional[int] = None + theta: Optional[float] = None + theta_se: Optional[float] = None + total_benar: int + time_spent: int # Total time in seconds + start_time: Optional[datetime] = None + end_time: Optional[datetime] = None + scoring_mode_used: str + rataan_used: Optional[float] = None + sb_used: Optional[float] = None + + +class AggregatePerformanceStatsOutput(BaseModel): + """Aggregate statistics for student performance output.""" + + tryout_id: str + participant_count: int + avg_nm: Optional[float] = None + std_nm: Optional[float] = None + min_nm: Optional[int] = None + max_nm: Optional[int] = None + median_nm: Optional[float] = None + avg_nn: Optional[float] = None + std_nn: Optional[float] = None + avg_theta: Optional[float] = None + pass_rate: float # Percentage with NN >= 500 + avg_time_spent: float # Average time in seconds + + +class StudentPerformanceReportOutput(BaseModel): + """Complete student performance report output.""" + + generated_at: datetime + tryout_id: str + website_id: int + date_range: Optional[Dict[str, str]] = None + aggregate: AggregatePerformanceStatsOutput + individual_records: List[StudentPerformanceRecordOutput] = [] + + +class StudentPerformanceReportRequest(BaseModel): + """Request schema for student performance report.""" + + tryout_id: str = Field(..., description="Tryout identifier") + website_id: int = Field(..., description="Website identifier") + date_start: Optional[datetime] = Field(None, description="Filter by start date") + date_end: Optional[datetime] = Field(None, description="Filter by end date") + format_type: Literal["individual", "aggregate", "both"] = Field( + default="both", description="Report format" + ) + + +# ============================================================================= +# Item Analysis Report Schemas +# ============================================================================= + +class ItemAnalysisRecordOutput(BaseModel): + """Item analysis record output for a single item.""" + + item_id: int + slot: int + level: str + ctt_p: Optional[float] = None + ctt_bobot: Optional[float] = None + ctt_category: Optional[str] = None + irt_b: Optional[float] = None + irt_se: Optional[float] = None + calibrated: bool + calibration_sample_size: int + correctness_rate: float + item_total_correlation: Optional[float] = None + information_values: Dict[float, float] = Field(default_factory=dict) + optimal_theta_range: str = "N/A" + + +class ItemAnalysisReportOutput(BaseModel): + """Complete item analysis report output.""" + + generated_at: datetime + tryout_id: str + website_id: int + total_items: int + items: List[ItemAnalysisRecordOutput] + summary: Dict[str, Any] + + +class ItemAnalysisReportRequest(BaseModel): + """Request schema for item analysis report.""" + + tryout_id: str = Field(..., description="Tryout identifier") + website_id: int = Field(..., description="Website identifier") + filter_by: Optional[Literal["difficulty", "calibrated", "discrimination"]] = Field( + None, description="Filter items by category" + ) + difficulty_level: Optional[Literal["mudah", "sedang", "sulit"]] = Field( + None, description="Filter by difficulty level (only when filter_by='difficulty')" + ) + + +# ============================================================================= +# Calibration Status Report Schemas +# ============================================================================= + +class CalibrationItemStatusOutput(BaseModel): + """Calibration status for a single item output.""" + + item_id: int + slot: int + level: str + sample_size: int + calibrated: bool + irt_b: Optional[float] = None + irt_se: Optional[float] = None + ctt_p: Optional[float] = None + + +class CalibrationStatusReportOutput(BaseModel): + """Complete calibration status report output.""" + + generated_at: datetime + tryout_id: str + website_id: int + total_items: int + calibrated_items: int + calibration_percentage: float + items_awaiting_calibration: List[CalibrationItemStatusOutput] + avg_calibration_sample_size: float + estimated_time_to_90_percent: Optional[str] = None + ready_for_irt_rollout: bool + items: List[CalibrationItemStatusOutput] + + +class CalibrationStatusReportRequest(BaseModel): + """Request schema for calibration status report.""" + + tryout_id: str = Field(..., description="Tryout identifier") + website_id: int = Field(..., description="Website identifier") + + +# ============================================================================= +# Tryout Comparison Report Schemas +# ============================================================================= + +class TryoutComparisonRecordOutput(BaseModel): + """Tryout comparison data point output.""" + + tryout_id: str + date: Optional[str] = None + subject: Optional[str] = None + participant_count: int + avg_nm: Optional[float] = None + avg_nn: Optional[float] = None + avg_theta: Optional[float] = None + std_nm: Optional[float] = None + calibration_percentage: float + + +class TryoutComparisonReportOutput(BaseModel): + """Complete tryout comparison report output.""" + + generated_at: datetime + comparison_type: Literal["date", "subject"] + tryouts: List[TryoutComparisonRecordOutput] + trends: Optional[Dict[str, Any]] = None + normalization_impact: Optional[Dict[str, Any]] = None + + +class TryoutComparisonReportRequest(BaseModel): + """Request schema for tryout comparison report.""" + + tryout_ids: List[str] = Field(..., min_length=2, description="List of tryout IDs to compare") + website_id: int = Field(..., description="Website identifier") + group_by: Literal["date", "subject"] = Field( + default="date", description="Group comparison by date or subject" + ) + + +# ============================================================================= +# Report Scheduling Schemas +# ============================================================================= + +class ReportScheduleRequest(BaseModel): + """Request schema for scheduling a report.""" + + report_type: Literal["student_performance", "item_analysis", "calibration_status", "tryout_comparison"] = Field( + ..., description="Type of report to generate" + ) + schedule: Literal["daily", "weekly", "monthly"] = Field( + ..., description="Schedule frequency" + ) + tryout_ids: List[str] = Field(..., description="List of tryout IDs for the report") + website_id: int = Field(..., description="Website identifier") + recipients: List[str] = Field(..., description="List of email addresses to send report to") + export_format: Literal["csv", "xlsx", "pdf"] = Field( + default="xlsx", description="Export format for the report" + ) + + +class ReportScheduleOutput(BaseModel): + """Output schema for scheduled report.""" + + schedule_id: str + report_type: str + schedule: str + tryout_ids: List[str] + website_id: int + recipients: List[str] + format: str + created_at: datetime + last_run: Optional[datetime] = None + next_run: Optional[datetime] = None + is_active: bool + + +class ReportScheduleResponse(BaseModel): + """Response schema for schedule creation.""" + + schedule_id: str + message: str + next_run: Optional[datetime] = None + + +# ============================================================================= +# Export Schemas +# ============================================================================= + +class ExportRequest(BaseModel): + """Request schema for exporting a report.""" + + schedule_id: str = Field(..., description="Schedule ID to generate report for") + export_format: Literal["csv", "xlsx", "pdf"] = Field( + default="xlsx", description="Export format" + ) + + +class ExportResponse(BaseModel): + """Response schema for export request.""" + + file_path: str + file_name: str + format: str + generated_at: datetime + download_url: Optional[str] = None diff --git a/app/schemas/session.py b/app/schemas/session.py new file mode 100644 index 0000000..e6abab7 --- /dev/null +++ b/app/schemas/session.py @@ -0,0 +1,108 @@ +""" +Pydantic schemas for Session API endpoints. +""" + +from datetime import datetime +from typing import List, Literal, Optional + +from pydantic import BaseModel, Field + + +class UserAnswerInput(BaseModel): + """Input schema for a single user answer.""" + + item_id: int = Field(..., description="Item/question ID") + response: str = Field(..., min_length=1, max_length=10, description="User's answer (A, B, C, D)") + time_spent: int = Field(default=0, ge=0, description="Time spent on this question (seconds)") + + +class SessionCompleteRequest(BaseModel): + """Request schema for completing a session.""" + + end_time: datetime = Field(..., description="Session end timestamp") + user_answers: List[UserAnswerInput] = Field(..., description="List of user answers") + + +class UserAnswerOutput(BaseModel): + """Output schema for a single user answer.""" + + id: int + item_id: int + response: str + is_correct: bool + time_spent: int + bobot_earned: float + scoring_mode_used: str + + model_config = {"from_attributes": True} + + +class SessionCompleteResponse(BaseModel): + """Response schema for completed session with CTT scores.""" + + id: int + session_id: str + wp_user_id: str + website_id: int + tryout_id: str + start_time: datetime + end_time: Optional[datetime] + is_completed: bool + scoring_mode_used: str + + # CTT scores + total_benar: int = Field(description="Total correct answers") + total_bobot_earned: float = Field(description="Total weight earned") + NM: Optional[int] = Field(description="Nilai Mentah (raw score) [0, 1000]") + NN: Optional[int] = Field(description="Nilai Nasional (normalized score) [0, 1000]") + + # Normalization metadata + rataan_used: Optional[float] = Field(description="Mean value used for normalization") + sb_used: Optional[float] = Field(description="Standard deviation used for normalization") + + # User answers + user_answers: List[UserAnswerOutput] + + model_config = {"from_attributes": True} + + +class SessionCreateRequest(BaseModel): + """Request schema for creating a new session.""" + + session_id: str = Field(..., description="Unique session identifier") + wp_user_id: str = Field(..., description="WordPress user ID") + website_id: int = Field(..., description="Website identifier") + tryout_id: str = Field(..., description="Tryout identifier") + scoring_mode: Literal["ctt", "irt", "hybrid"] = Field( + default="ctt", description="Scoring mode for this session" + ) + + +class SessionResponse(BaseModel): + """Response schema for session data.""" + + id: int + session_id: str + wp_user_id: str + website_id: int + tryout_id: str + start_time: datetime + end_time: Optional[datetime] + is_completed: bool + scoring_mode_used: str + + # CTT scores (populated after completion) + total_benar: int + total_bobot_earned: float + NM: Optional[int] + NN: Optional[int] + + # IRT scores (populated after completion) + theta: Optional[float] + theta_se: Optional[float] + + # Normalization metadata + rataan_used: Optional[float] + sb_used: Optional[float] + + model_config = {"from_attributes": True} diff --git a/app/schemas/tryout.py b/app/schemas/tryout.py new file mode 100644 index 0000000..3dbc147 --- /dev/null +++ b/app/schemas/tryout.py @@ -0,0 +1,97 @@ +""" +Pydantic schemas for Tryout API endpoints. +""" + +from datetime import datetime +from typing import List, Literal, Optional + +from pydantic import BaseModel, Field + + +class TryoutConfigResponse(BaseModel): + """Response schema for tryout configuration.""" + + id: int + website_id: int + tryout_id: str + name: str + description: Optional[str] + + # Scoring configuration + scoring_mode: Literal["ctt", "irt", "hybrid"] + selection_mode: Literal["fixed", "adaptive", "hybrid"] + normalization_mode: Literal["static", "dynamic", "hybrid"] + + # Normalization settings + min_sample_for_dynamic: int + static_rataan: float + static_sb: float + + # AI generation + ai_generation_enabled: bool + + # Hybrid mode settings + hybrid_transition_slot: Optional[int] + + # IRT settings + min_calibration_sample: int + theta_estimation_method: Literal["mle", "map", "eap"] + fallback_to_ctt_on_error: bool + + # Current stats + current_stats: Optional["TryoutStatsResponse"] + + # Timestamps + created_at: datetime + updated_at: datetime + + model_config = {"from_attributes": True} + + +class TryoutStatsResponse(BaseModel): + """Response schema for tryout statistics.""" + + participant_count: int + rataan: Optional[float] + sb: Optional[float] + min_nm: Optional[int] + max_nm: Optional[int] + last_calculated: Optional[datetime] + + model_config = {"from_attributes": True} + + +class TryoutConfigBrief(BaseModel): + """Brief tryout config for list responses.""" + + tryout_id: str + name: str + scoring_mode: str + selection_mode: str + normalization_mode: str + participant_count: Optional[int] = None + + model_config = {"from_attributes": True} + + +class NormalizationUpdateRequest(BaseModel): + """Request schema for updating normalization settings.""" + + normalization_mode: Optional[Literal["static", "dynamic", "hybrid"]] = None + static_rataan: Optional[float] = Field(None, ge=0) + static_sb: Optional[float] = Field(None, gt=0) + + +class NormalizationUpdateResponse(BaseModel): + """Response schema for normalization update.""" + + tryout_id: str + normalization_mode: str + static_rataan: float + static_sb: float + will_switch_to_dynamic_at: int + current_participant_count: int + + +# Update forward reference +TryoutConfigResponse.model_rebuild() diff --git a/app/schemas/wordpress.py b/app/schemas/wordpress.py new file mode 100644 index 0000000..eb6f2c1 --- /dev/null +++ b/app/schemas/wordpress.py @@ -0,0 +1,86 @@ +""" +Pydantic schemas for WordPress Integration API endpoints. +""" + +from datetime import datetime +from typing import Any, List, Optional + +from pydantic import BaseModel, Field + + +class VerifySessionRequest(BaseModel): + """Request schema for verifying WordPress session.""" + + wp_user_id: str = Field(..., description="WordPress user ID") + token: str = Field(..., description="WordPress JWT authentication token") + website_id: int = Field(..., description="Website identifier") + + +class WordPressUserResponse(BaseModel): + """Response schema for WordPress user data.""" + + id: int = Field(..., description="Local database user ID") + wp_user_id: str = Field(..., description="WordPress user ID") + website_id: int = Field(..., description="Website identifier") + created_at: datetime = Field(..., description="User creation timestamp") + updated_at: datetime = Field(..., description="User last update timestamp") + + model_config = {"from_attributes": True} + + +class VerifySessionResponse(BaseModel): + """Response schema for session verification.""" + + valid: bool = Field(..., description="Whether the session is valid") + user: Optional[WordPressUserResponse] = Field( + default=None, description="User data if session is valid" + ) + error: Optional[str] = Field( + default=None, description="Error message if session is invalid" + ) + wp_user_info: Optional[dict[str, Any]] = Field( + default=None, description="WordPress user info from API" + ) + + +class SyncUsersRequest(BaseModel): + """Request schema for user synchronization (optional body).""" + + pass + + +class SyncStatsResponse(BaseModel): + """Response schema for user synchronization statistics.""" + + inserted: int = Field(..., description="Number of users inserted") + updated: int = Field(..., description="Number of users updated") + total: int = Field(..., description="Total users processed") + errors: int = Field(default=0, description="Number of errors during sync") + + +class SyncUsersResponse(BaseModel): + """Response schema for user synchronization.""" + + synced: SyncStatsResponse = Field(..., description="Synchronization statistics") + website_id: int = Field(..., description="Website identifier") + message: str = Field(default="Sync completed", description="Status message") + + +class UserListResponse(BaseModel): + """Response schema for paginated user list.""" + + users: List[WordPressUserResponse] = Field(..., description="List of users") + total: int = Field(..., description="Total number of users") + page: int = Field(default=1, description="Current page number") + page_size: int = Field(default=50, description="Number of users per page") + total_pages: int = Field(default=1, description="Total number of pages") + + +class WordPressErrorDetail(BaseModel): + """Detail schema for WordPress errors.""" + + code: str = Field(..., description="Error code") + message: str = Field(..., description="Error message") + details: Optional[dict[str, Any]] = Field( + default=None, description="Additional error details" + ) diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..aeb3ace --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1,155 @@ +""" +Services module for IRT Bank Soal. + +Contains business logic services for: +- IRT calibration +- CAT selection +- WordPress authentication +- AI question generation +- Reporting +""" + +from app.services.irt_calibration import ( + IRTCalibrationError, + calculate_fisher_information, + calculate_item_information, + calculate_probability, + calculate_theta_se, + estimate_b_from_ctt_p, + estimate_theta_mle, + get_session_responses, + nn_to_theta, + theta_to_nn, + update_session_theta, + update_theta_after_response, +) +from app.services.cat_selection import ( + CATSelectionError, + NextItemResult, + TerminationCheck, + check_user_level_reuse, + get_available_levels_for_slot, + get_next_item, + get_next_item_adaptive, + get_next_item_fixed, + get_next_item_hybrid, + should_terminate, + simulate_cat_selection, + update_theta, +) +from app.services.wordpress_auth import ( + WordPressAPIError, + WordPressAuthError, + WordPressRateLimitError, + WordPressTokenInvalidError, + WordPressUserInfo, + WebsiteNotFoundError, + SyncStats, + fetch_wordpress_users, + get_or_create_user, + get_wordpress_user, + sync_wordpress_users, + verify_website_exists, + verify_wordpress_token, +) +from app.services.ai_generation import ( + call_openrouter_api, + check_cache_reuse, + generate_question, + generate_with_cache_check, + get_ai_stats, + get_prompt_template, + parse_ai_response, + save_ai_question, + validate_ai_model, + SUPPORTED_MODELS, +) +from app.services.reporting import ( + generate_student_performance_report, + generate_item_analysis_report, + generate_calibration_status_report, + generate_tryout_comparison_report, + export_report_to_csv, + export_report_to_excel, + export_report_to_pdf, + schedule_report, + get_scheduled_report, + list_scheduled_reports, + cancel_scheduled_report, + StudentPerformanceReport, + ItemAnalysisReport, + CalibrationStatusReport, + TryoutComparisonReport, + ReportSchedule, +) + +__all__ = [ + # IRT Calibration + "IRTCalibrationError", + "calculate_fisher_information", + "calculate_item_information", + "calculate_probability", + "calculate_theta_se", + "estimate_b_from_ctt_p", + "estimate_theta_mle", + "get_session_responses", + "nn_to_theta", + "theta_to_nn", + "update_session_theta", + "update_theta_after_response", + # CAT Selection + "CATSelectionError", + "NextItemResult", + "TerminationCheck", + "check_user_level_reuse", + "get_available_levels_for_slot", + "get_next_item", + "get_next_item_adaptive", + "get_next_item_fixed", + "get_next_item_hybrid", + "should_terminate", + "simulate_cat_selection", + "update_theta", + # WordPress Auth + "WordPressAPIError", + "WordPressAuthError", + "WordPressRateLimitError", + "WordPressTokenInvalidError", + "WordPressUserInfo", + "WebsiteNotFoundError", + "SyncStats", + "fetch_wordpress_users", + "get_or_create_user", + "get_wordpress_user", + "sync_wordpress_users", + "verify_website_exists", + "verify_wordpress_token", + # AI Generation + "call_openrouter_api", + "check_cache_reuse", + "generate_question", + "generate_with_cache_check", + "get_ai_stats", + "get_prompt_template", + "parse_ai_response", + "save_ai_question", + "validate_ai_model", + "SUPPORTED_MODELS", + # Reporting + "generate_student_performance_report", + "generate_item_analysis_report", + "generate_calibration_status_report", + "generate_tryout_comparison_report", + "export_report_to_csv", + "export_report_to_excel", + "export_report_to_pdf", + "schedule_report", + "get_scheduled_report", + "list_scheduled_reports", + "cancel_scheduled_report", + "StudentPerformanceReport", + "ItemAnalysisReport", + "CalibrationStatusReport", + "TryoutComparisonReport", + "ReportSchedule", +] diff --git a/app/services/ai_generation.py b/app/services/ai_generation.py new file mode 100644 index 0000000..609db7b --- /dev/null +++ b/app/services/ai_generation.py @@ -0,0 +1,595 @@ +""" +AI Question Generation Service. + +Handles OpenRouter API integration for generating question variants. +Implements caching, user-level reuse checking, and prompt engineering. +""" + +import json +import logging +import re +from typing import Any, Dict, Literal, Optional, Union + +import httpx +from sqlalchemy import and_, func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.models.item import Item +from app.models.tryout import Tryout +from app.models.user_answer import UserAnswer +from app.schemas.ai import GeneratedQuestion + +logger = logging.getLogger(__name__) +settings = get_settings() + +# OpenRouter API configuration +OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" + +# Supported AI models +SUPPORTED_MODELS = { + "qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B", + "meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B", +} + +# Level mapping for prompts +LEVEL_DESCRIPTIONS = { + "mudah": "easier (simpler concepts, more straightforward calculations)", + "sedang": "medium difficulty", + "sulit": "harder (more complex concepts, multi-step reasoning)", +} + + +def get_prompt_template( + basis_stem: str, + basis_options: Dict[str, str], + basis_correct: str, + basis_explanation: Optional[str], + target_level: Literal["mudah", "sulit"], +) -> str: + """ + Generate standardized prompt for AI question generation. + + Args: + basis_stem: The basis question stem + basis_options: The basis question options + basis_correct: The basis correct answer + basis_explanation: The basis explanation + target_level: Target difficulty level + + Returns: + Formatted prompt string + """ + level_desc = LEVEL_DESCRIPTIONS.get(target_level, target_level) + + options_text = "\n".join( + [f" {key}: {value}" for key, value in basis_options.items()] + ) + + explanation_text = ( + f"Explanation: {basis_explanation}" + if basis_explanation + else "Explanation: (not provided)" + ) + + prompt = f"""You are an educational content creator specializing in creating assessment questions. + +Given a "Sedang" (medium difficulty) question, generate a new question at a different difficulty level. + +BASIS QUESTION (Sedang level): +Question: {basis_stem} +Options: +{options_text} +Correct Answer: {basis_correct} +{explanation_text} + +TASK: +Generate 1 new question that is {level_desc} than the basis question above. + +REQUIREMENTS: +1. Keep the SAME topic/subject matter as the basis question +2. Use similar context and terminology +3. Create exactly 4 answer options (A, B, C, D) +4. Only ONE correct answer +5. Include a clear explanation of why the correct answer is correct +6. Make the question noticeably {level_desc} - not just a minor variation + +OUTPUT FORMAT: +Return ONLY a valid JSON object with this exact structure (no markdown, no code blocks): +{{"stem": "Your question text here", "options": {{"A": "Option A text", "B": "Option B text", "C": "Option C text", "D": "Option D text"}}, "correct": "A", "explanation": "Explanation text here"}} + +Remember: The correct field must be exactly "A", "B", "C", or "D".""" + + return prompt + + +def parse_ai_response(response_text: str) -> Optional[GeneratedQuestion]: + """ + Parse AI response to extract question data. + + Handles various response formats including JSON code blocks. + + Args: + response_text: Raw AI response text + + Returns: + GeneratedQuestion if parsing successful, None otherwise + """ + if not response_text: + return None + + # Clean the response text + cleaned = response_text.strip() + + # Try to extract JSON from code blocks if present + json_patterns = [ + r"```json\s*([\s\S]*?)\s*```", # ```json ... ``` + r"```\s*([\s\S]*?)\s*```", # ``` ... ``` + r"(\{[\s\S]*\})", # Raw JSON object + ] + + for pattern in json_patterns: + match = re.search(pattern, cleaned) + if match: + json_str = match.group(1).strip() + try: + data = json.loads(json_str) + return validate_and_create_question(data) + except json.JSONDecodeError: + continue + + # Try parsing the entire response as JSON + try: + data = json.loads(cleaned) + return validate_and_create_question(data) + except json.JSONDecodeError: + pass + + logger.warning(f"Failed to parse AI response: {cleaned[:200]}...") + return None + + +def validate_and_create_question(data: Dict[str, Any]) -> Optional[GeneratedQuestion]: + """ + Validate parsed data and create GeneratedQuestion. + + Args: + data: Parsed JSON data + + Returns: + GeneratedQuestion if valid, None otherwise + """ + required_fields = ["stem", "options", "correct"] + if not all(field in data for field in required_fields): + logger.warning(f"Missing required fields in AI response: {data.keys()}") + return None + + # Validate options + options = data.get("options", {}) + if not isinstance(options, dict): + logger.warning("Options is not a dictionary") + return None + + required_options = {"A", "B", "C", "D"} + if not required_options.issubset(set(options.keys())): + logger.warning(f"Missing required options: {required_options - set(options.keys())}") + return None + + # Validate correct answer + correct = str(data.get("correct", "")).upper() + if correct not in required_options: + logger.warning(f"Invalid correct answer: {correct}") + return None + + return GeneratedQuestion( + stem=str(data["stem"]).strip(), + options={k: str(v).strip() for k, v in options.items()}, + correct=correct, + explanation=str(data.get("explanation", "")).strip() or None, + ) + + +async def call_openrouter_api( + prompt: str, + model: str, + max_retries: int = 3, +) -> Optional[str]: + """ + Call OpenRouter API to generate question. + + Args: + prompt: The prompt to send + model: AI model to use + max_retries: Maximum retry attempts + + Returns: + API response text or None if failed + """ + if not settings.OPENROUTER_API_KEY: + logger.error("OPENROUTER_API_KEY not configured") + return None + + if model not in SUPPORTED_MODELS: + logger.error(f"Unsupported AI model: {model}") + return None + + headers = { + "Authorization": f"Bearer {settings.OPENROUTER_API_KEY}", + "Content-Type": "application/json", + "HTTP-Referer": "https://github.com/irt-bank-soal", + "X-Title": "IRT Bank Soal", + } + + payload = { + "model": model, + "messages": [ + { + "role": "user", + "content": prompt, + } + ], + "max_tokens": 2000, + "temperature": 0.7, + } + + timeout = httpx.Timeout(settings.OPENROUTER_TIMEOUT) + + for attempt in range(max_retries): + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post( + OPENROUTER_API_URL, + headers=headers, + json=payload, + ) + + if response.status_code == 200: + data = response.json() + choices = data.get("choices", []) + if choices: + message = choices[0].get("message", {}) + return message.get("content") + logger.warning("No choices in OpenRouter response") + return None + + elif response.status_code == 429: + # Rate limited - wait and retry + logger.warning(f"Rate limited, attempt {attempt + 1}/{max_retries}") + if attempt < max_retries - 1: + import asyncio + await asyncio.sleep(2 ** attempt) + continue + return None + + else: + logger.error( + f"OpenRouter API error: {response.status_code} - {response.text}" + ) + return None + + except httpx.TimeoutException: + logger.warning(f"OpenRouter timeout, attempt {attempt + 1}/{max_retries}") + if attempt < max_retries - 1: + continue + return None + + except Exception as e: + logger.error(f"OpenRouter API call failed: {e}") + if attempt < max_retries - 1: + continue + return None + + return None + + +async def generate_question( + basis_item: Item, + target_level: Literal["mudah", "sulit"], + ai_model: str = "qwen/qwen-2.5-coder-32b-instruct", +) -> Optional[GeneratedQuestion]: + """ + Generate a new question based on a basis item. + + Args: + basis_item: The basis item (must be sedang level) + target_level: Target difficulty level + ai_model: AI model to use + + Returns: + GeneratedQuestion if successful, None otherwise + """ + # Build prompt + prompt = get_prompt_template( + basis_stem=basis_item.stem, + basis_options=basis_item.options, + basis_correct=basis_item.correct_answer, + basis_explanation=basis_item.explanation, + target_level=target_level, + ) + + # Call OpenRouter API + response_text = await call_openrouter_api(prompt, ai_model) + + if not response_text: + logger.error("No response from OpenRouter API") + return None + + # Parse response + generated = parse_ai_response(response_text) + + if not generated: + logger.error("Failed to parse AI response") + return None + + return generated + + +async def check_cache_reuse( + tryout_id: str, + slot: int, + level: str, + wp_user_id: str, + website_id: int, + db: AsyncSession, +) -> Optional[Item]: + """ + Check if there's a cached item that the user hasn't answered yet. + + Query DB for existing item matching (tryout_id, slot, level). + Check if user already answered this item at this difficulty level. + + Args: + tryout_id: Tryout identifier + slot: Question slot + level: Difficulty level + wp_user_id: WordPress user ID + website_id: Website identifier + db: Database session + + Returns: + Cached item if found and user hasn't answered, None otherwise + """ + # Find existing items at this slot/level + result = await db.execute( + select(Item).where( + and_( + Item.tryout_id == tryout_id, + Item.website_id == website_id, + Item.slot == slot, + Item.level == level, + ) + ) + ) + existing_items = result.scalars().all() + + if not existing_items: + return None + + # Check each item to find one the user hasn't answered + for item in existing_items: + # Check if user has answered this item + answer_result = await db.execute( + select(UserAnswer).where( + and_( + UserAnswer.item_id == item.id, + UserAnswer.wp_user_id == wp_user_id, + ) + ) + ) + user_answer = answer_result.scalar_one_or_none() + + if user_answer is None: + # User hasn't answered this item - can reuse + logger.info( + f"Cache hit for tryout={tryout_id}, slot={slot}, level={level}, " + f"item_id={item.id}, user={wp_user_id}" + ) + return item + + # All items have been answered by this user + logger.info( + f"Cache miss (user answered all) for tryout={tryout_id}, slot={slot}, " + f"level={level}, user={wp_user_id}" + ) + return None + + +async def generate_with_cache_check( + tryout_id: str, + slot: int, + level: Literal["mudah", "sulit"], + wp_user_id: str, + website_id: int, + db: AsyncSession, + ai_model: str = "qwen/qwen-2.5-coder-32b-instruct", +) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]: + """ + Generate question with cache checking. + + First checks if AI generation is enabled for the tryout. + Then checks for cached items the user hasn't answered. + If cache miss, generates new question via AI. + + Args: + tryout_id: Tryout identifier + slot: Question slot + level: Target difficulty level + wp_user_id: WordPress user ID + website_id: Website identifier + db: Database session + ai_model: AI model to use + + Returns: + Tuple of (item/question or None, is_cached) + """ + # Check if AI generation is enabled for this tryout + tryout_result = await db.execute( + select(Tryout).where( + and_( + Tryout.tryout_id == tryout_id, + Tryout.website_id == website_id, + ) + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout and not tryout.ai_generation_enabled: + logger.info(f"AI generation disabled for tryout={tryout_id}") + # Still check cache even if AI disabled + cached_item = await check_cache_reuse( + tryout_id, slot, level, wp_user_id, website_id, db + ) + if cached_item: + return cached_item, True + return None, False + + # Check cache for reusable item + cached_item = await check_cache_reuse( + tryout_id, slot, level, wp_user_id, website_id, db + ) + + if cached_item: + return cached_item, True + + # Cache miss - need to generate + # Get basis item (sedang level at same slot) + basis_result = await db.execute( + select(Item).where( + and_( + Item.tryout_id == tryout_id, + Item.website_id == website_id, + Item.slot == slot, + Item.level == "sedang", + ) + ).limit(1) + ) + basis_item = basis_result.scalar_one_or_none() + + if not basis_item: + logger.error( + f"No basis item found for tryout={tryout_id}, slot={slot}" + ) + return None, False + + # Generate new question + generated = await generate_question(basis_item, level, ai_model) + + if not generated: + logger.error( + f"Failed to generate question for tryout={tryout_id}, slot={slot}, level={level}" + ) + return None, False + + return generated, False + + +async def save_ai_question( + generated_data: GeneratedQuestion, + tryout_id: str, + website_id: int, + basis_item_id: int, + slot: int, + level: Literal["mudah", "sedang", "sulit"], + ai_model: str, + db: AsyncSession, +) -> Optional[int]: + """ + Save AI-generated question to database. + + Args: + generated_data: Generated question data + tryout_id: Tryout identifier + website_id: Website identifier + basis_item_id: Basis item ID + slot: Question slot + level: Difficulty level + ai_model: AI model used + db: Database session + + Returns: + Created item ID or None if failed + """ + try: + new_item = Item( + tryout_id=tryout_id, + website_id=website_id, + slot=slot, + level=level, + stem=generated_data.stem, + options=generated_data.options, + correct_answer=generated_data.correct, + explanation=generated_data.explanation, + generated_by="ai", + ai_model=ai_model, + basis_item_id=basis_item_id, + calibrated=False, + ctt_p=None, + ctt_bobot=None, + ctt_category=None, + irt_b=None, + irt_se=None, + calibration_sample_size=0, + ) + + db.add(new_item) + await db.flush() # Get the ID without committing + + logger.info( + f"Saved AI-generated item: id={new_item.id}, tryout={tryout_id}, " + f"slot={slot}, level={level}, model={ai_model}" + ) + + return new_item.id + + except Exception as e: + logger.error(f"Failed to save AI-generated question: {e}") + return None + + +async def get_ai_stats(db: AsyncSession) -> Dict[str, Any]: + """ + Get AI generation statistics. + + Args: + db: Database session + + Returns: + Statistics dictionary + """ + # Total AI-generated items + total_result = await db.execute( + select(func.count(Item.id)).where(Item.generated_by == "ai") + ) + total_ai_items = total_result.scalar() or 0 + + # Items by model + model_result = await db.execute( + select(Item.ai_model, func.count(Item.id)) + .where(Item.generated_by == "ai") + .where(Item.ai_model.isnot(None)) + .group_by(Item.ai_model) + ) + items_by_model = {row[0]: row[1] for row in model_result.all()} + + # Note: Cache hit rate would need to be tracked separately + # This is a placeholder for now + return { + "total_ai_items": total_ai_items, + "items_by_model": items_by_model, + "cache_hit_rate": 0.0, + "total_cache_hits": 0, + "total_requests": 0, + } + + +def validate_ai_model(model: str) -> bool: + """ + Validate that the AI model is supported. + + Args: + model: AI model identifier + + Returns: + True if model is supported + """ + return model in SUPPORTED_MODELS diff --git a/app/services/cat_selection.py b/app/services/cat_selection.py new file mode 100644 index 0000000..eb52c35 --- /dev/null +++ b/app/services/cat_selection.py @@ -0,0 +1,702 @@ +""" +CAT (Computerized Adaptive Testing) Selection Service. + +Implements adaptive item selection algorithms for IRT-based testing. +Supports three modes: CTT (fixed), IRT (adaptive), and hybrid. +""" + +import math +from dataclasses import dataclass +from datetime import datetime +from typing import Literal, Optional + +from sqlalchemy import and_, not_, or_, select, func +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.models import Item, Session, Tryout, UserAnswer +from app.services.irt_calibration import ( + calculate_item_information, + estimate_b_from_ctt_p, + estimate_theta_mle, + update_theta_after_response, +) + + +class CATSelectionError(Exception): + """Exception raised for CAT selection errors.""" + pass + + +@dataclass +class NextItemResult: + """Result of next item selection.""" + item: Optional[Item] + selection_method: str # 'fixed', 'adaptive', 'hybrid' + slot: Optional[int] + level: Optional[str] + reason: str # Why this item was selected + + +@dataclass +class TerminationCheck: + """Result of termination condition check.""" + should_terminate: bool + reason: str + items_answered: int + current_se: Optional[float] + max_items: Optional[int] + se_threshold_met: bool + + +# Default SE threshold for termination +DEFAULT_SE_THRESHOLD = 0.5 +# Default max items if not configured +DEFAULT_MAX_ITEMS = 50 + + +async def get_next_item_fixed( + db: AsyncSession, + session_id: str, + tryout_id: str, + website_id: int, + level_filter: Optional[str] = None +) -> NextItemResult: + """ + Get next item in fixed order (CTT mode). + + Returns items in slot order (1, 2, 3, ...). + Filters by level if specified. + Checks if student already answered this item. + + Args: + db: Database session + session_id: Session identifier + tryout_id: Tryout identifier + website_id: Website identifier + level_filter: Optional difficulty level filter ('mudah', 'sedang', 'sulit') + + Returns: + NextItemResult with selected item or None if no more items + """ + # Get session to find current position and answered items + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise CATSelectionError(f"Session {session_id} not found") + + # Get all item IDs already answered by this user in this session + answered_query = select(UserAnswer.item_id).where( + UserAnswer.session_id == session_id + ) + answered_result = await db.execute(answered_query) + answered_item_ids = [row[0] for row in answered_result.all()] + + # Build query for available items + query = ( + select(Item) + .where( + Item.tryout_id == tryout_id, + Item.website_id == website_id + ) + .order_by(Item.slot, Item.level) + ) + + # Apply level filter if specified + if level_filter: + query = query.where(Item.level == level_filter) + + # Exclude already answered items + if answered_item_ids: + query = query.where(not_(Item.id.in_(answered_item_ids))) + + result = await db.execute(query) + items = result.scalars().all() + + if not items: + return NextItemResult( + item=None, + selection_method="fixed", + slot=None, + level=None, + reason="No more items available" + ) + + # Return first available item (lowest slot) + next_item = items[0] + + return NextItemResult( + item=next_item, + selection_method="fixed", + slot=next_item.slot, + level=next_item.level, + reason=f"Fixed order selection - slot {next_item.slot}" + ) + + +async def get_next_item_adaptive( + db: AsyncSession, + session_id: str, + tryout_id: str, + website_id: int, + ai_generation_enabled: bool = False, + level_filter: Optional[str] = None +) -> NextItemResult: + """ + Get next item using adaptive selection (IRT mode). + + Finds item where b ≈ current theta. + Only uses calibrated items (calibrated=True). + Filters: student hasn't answered this item. + Filters: AI-generated items only if AI generation is enabled. + + Args: + db: Database session + session_id: Session identifier + tryout_id: Tryout identifier + website_id: Website identifier + ai_generation_enabled: Whether to include AI-generated items + level_filter: Optional difficulty level filter + + Returns: + NextItemResult with selected item or None if no suitable items + """ + # Get session for current theta + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise CATSelectionError(f"Session {session_id} not found") + + # Get current theta (default to 0.0 for first item) + current_theta = session.theta if session.theta is not None else 0.0 + + # Get all item IDs already answered by this user in this session + answered_query = select(UserAnswer.item_id).where( + UserAnswer.session_id == session_id + ) + answered_result = await db.execute(answered_query) + answered_item_ids = [row[0] for row in answered_result.all()] + + # Build query for available calibrated items + query = ( + select(Item) + .where( + Item.tryout_id == tryout_id, + Item.website_id == website_id, + Item.calibrated == True # Only calibrated items for IRT + ) + ) + + # Apply level filter if specified + if level_filter: + query = query.where(Item.level == level_filter) + + # Exclude already answered items + if answered_item_ids: + query = query.where(not_(Item.id.in_(answered_item_ids))) + + # Filter AI-generated items if AI generation is disabled + if not ai_generation_enabled: + query = query.where(Item.generated_by == 'manual') + + result = await db.execute(query) + items = result.scalars().all() + + if not items: + return NextItemResult( + item=None, + selection_method="adaptive", + slot=None, + level=None, + reason="No calibrated items available" + ) + + # Find item with b closest to current theta + # Also consider item information (prefer items with higher information at current theta) + best_item = None + best_score = float('inf') + + for item in items: + if item.irt_b is None: + # Skip items without b parameter (shouldn't happen with calibrated=True) + continue + + # Calculate distance from theta + b_distance = abs(item.irt_b - current_theta) + + # Calculate item information at current theta + information = calculate_item_information(current_theta, item.irt_b) + + # Score: minimize distance, maximize information + # Use weighted combination: lower score is better + # Add small penalty for lower information + score = b_distance - (0.1 * information) + + if score < best_score: + best_score = score + best_item = item + + if not best_item: + return NextItemResult( + item=None, + selection_method="adaptive", + slot=None, + level=None, + reason="No items with valid IRT parameters available" + ) + + return NextItemResult( + item=best_item, + selection_method="adaptive", + slot=best_item.slot, + level=best_item.level, + reason=f"Adaptive selection - b={best_item.irt_b:.3f} ≈ θ={current_theta:.3f}" + ) + + +async def get_next_item_hybrid( + db: AsyncSession, + session_id: str, + tryout_id: str, + website_id: int, + hybrid_transition_slot: int = 10, + ai_generation_enabled: bool = False, + level_filter: Optional[str] = None +) -> NextItemResult: + """ + Get next item using hybrid selection. + + Uses fixed order for first N items, then switches to adaptive. + Falls back to CTT if no calibrated items available. + + Args: + db: Database session + session_id: Session identifier + tryout_id: Tryout identifier + website_id: Website identifier + hybrid_transition_slot: Slot number to transition from fixed to adaptive + ai_generation_enabled: Whether to include AI-generated items + level_filter: Optional difficulty level filter + + Returns: + NextItemResult with selected item or None if no items available + """ + # Get session to check current position + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise CATSelectionError(f"Session {session_id} not found") + + # Count answered items to determine current position + count_query = select(func.count(UserAnswer.id)).where( + UserAnswer.session_id == session_id + ) + count_result = await db.execute(count_query) + items_answered = count_result.scalar() or 0 + + # Determine current slot (next slot to fill) + current_slot = items_answered + 1 + + # Check if we're still in fixed phase + if current_slot <= hybrid_transition_slot: + # Use fixed selection for initial items + result = await get_next_item_fixed( + db, session_id, tryout_id, website_id, level_filter + ) + result.selection_method = "hybrid_fixed" + result.reason = f"Hybrid mode (fixed phase) - slot {current_slot}" + return result + + # Try adaptive selection + adaptive_result = await get_next_item_adaptive( + db, session_id, tryout_id, website_id, ai_generation_enabled, level_filter + ) + + if adaptive_result.item is not None: + adaptive_result.selection_method = "hybrid_adaptive" + adaptive_result.reason = f"Hybrid mode (adaptive phase) - {adaptive_result.reason}" + return adaptive_result + + # Fallback to fixed selection if no calibrated items available + fixed_result = await get_next_item_fixed( + db, session_id, tryout_id, website_id, level_filter + ) + fixed_result.selection_method = "hybrid_fallback" + fixed_result.reason = f"Hybrid mode (CTT fallback) - {fixed_result.reason}" + return fixed_result + + +async def update_theta( + db: AsyncSession, + session_id: str, + item_id: int, + is_correct: bool +) -> tuple[float, float]: + """ + Update session theta estimate based on response. + + Calls estimate_theta from irt_calibration.py. + Updates session.theta and session.theta_se. + Handles initial theta (uses 0.0 for first item). + Clamps theta to [-3, +3]. + + Args: + db: Database session + session_id: Session identifier + item_id: Item that was answered + is_correct: Whether the answer was correct + + Returns: + Tuple of (theta, theta_se) + """ + return await update_theta_after_response(db, session_id, item_id, is_correct) + + +async def should_terminate( + db: AsyncSession, + session_id: str, + max_items: Optional[int] = None, + se_threshold: float = DEFAULT_SE_THRESHOLD +) -> TerminationCheck: + """ + Check if session should terminate. + + Termination conditions: + - Reached max_items + - Reached SE threshold (theta_se < se_threshold) + - No more items available + + Args: + db: Database session + session_id: Session identifier + max_items: Maximum items allowed (None = no limit) + se_threshold: SE threshold for termination + + Returns: + TerminationCheck with termination status and reason + """ + # Get session + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise CATSelectionError(f"Session {session_id} not found") + + # Count answered items + count_query = select(func.count(UserAnswer.id)).where( + UserAnswer.session_id == session_id + ) + count_result = await db.execute(count_query) + items_answered = count_result.scalar() or 0 + + # Check max items + max_items_reached = False + if max_items is not None and items_answered >= max_items: + max_items_reached = True + + # Check SE threshold + current_se = session.theta_se + se_threshold_met = False + if current_se is not None and current_se < se_threshold: + se_threshold_met = True + + # Check if we have enough items for SE threshold (at least 15 items per PRD) + min_items_for_se = 15 + se_threshold_met = se_threshold_met and items_answered >= min_items_for_se + + # Determine termination + should_term = max_items_reached or se_threshold_met + + # Build reason + reasons = [] + if max_items_reached: + reasons.append(f"max items reached ({items_answered}/{max_items})") + if se_threshold_met: + reasons.append(f"SE threshold met ({current_se:.3f} < {se_threshold})") + + if not reasons: + reasons.append("continuing") + + return TerminationCheck( + should_terminate=should_term, + reason="; ".join(reasons), + items_answered=items_answered, + current_se=current_se, + max_items=max_items, + se_threshold_met=se_threshold_met + ) + + +async def get_next_item( + db: AsyncSession, + session_id: str, + selection_mode: Literal["fixed", "adaptive", "hybrid"] = "fixed", + hybrid_transition_slot: int = 10, + ai_generation_enabled: bool = False, + level_filter: Optional[str] = None +) -> NextItemResult: + """ + Get next item based on selection mode. + + Main entry point for item selection. + + Args: + db: Database session + session_id: Session identifier + selection_mode: Selection mode ('fixed', 'adaptive', 'hybrid') + hybrid_transition_slot: Slot to transition in hybrid mode + ai_generation_enabled: Whether AI generation is enabled + level_filter: Optional difficulty level filter + + Returns: + NextItemResult with selected item + """ + # Get session for tryout info + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise CATSelectionError(f"Session {session_id} not found") + + tryout_id = session.tryout_id + website_id = session.website_id + + if selection_mode == "fixed": + return await get_next_item_fixed( + db, session_id, tryout_id, website_id, level_filter + ) + elif selection_mode == "adaptive": + return await get_next_item_adaptive( + db, session_id, tryout_id, website_id, ai_generation_enabled, level_filter + ) + elif selection_mode == "hybrid": + return await get_next_item_hybrid( + db, session_id, tryout_id, website_id, + hybrid_transition_slot, ai_generation_enabled, level_filter + ) + else: + raise CATSelectionError(f"Unknown selection mode: {selection_mode}") + + +async def check_user_level_reuse( + db: AsyncSession, + wp_user_id: str, + website_id: int, + tryout_id: str, + slot: int, + level: str +) -> bool: + """ + Check if user has already answered a question at this difficulty level. + + Per PRD FR-5.3: Check if student user_id already answered question + at specific difficulty level. + + Args: + db: Database session + wp_user_id: WordPress user ID + website_id: Website identifier + tryout_id: Tryout identifier + slot: Question slot + level: Difficulty level + + Returns: + True if user has answered at this level, False otherwise + """ + # Check if user has answered any item at this slot/level combination + query = ( + select(func.count(UserAnswer.id)) + .join(Item, UserAnswer.item_id == Item.id) + .where( + UserAnswer.wp_user_id == wp_user_id, + UserAnswer.website_id == website_id, + UserAnswer.tryout_id == tryout_id, + Item.slot == slot, + Item.level == level + ) + ) + + result = await db.execute(query) + count = result.scalar() or 0 + + return count > 0 + + +async def get_available_levels_for_slot( + db: AsyncSession, + tryout_id: str, + website_id: int, + slot: int +) -> list[str]: + """ + Get available difficulty levels for a specific slot. + + Args: + db: Database session + tryout_id: Tryout identifier + website_id: Website identifier + slot: Question slot + + Returns: + List of available levels + """ + query = ( + select(Item.level) + .where( + Item.tryout_id == tryout_id, + Item.website_id == website_id, + Item.slot == slot + ) + .distinct() + ) + + result = await db.execute(query) + levels = [row[0] for row in result.all()] + + return levels + + +# Admin playground functions for testing CAT behavior + +async def simulate_cat_selection( + db: AsyncSession, + tryout_id: str, + website_id: int, + initial_theta: float = 0.0, + selection_mode: Literal["fixed", "adaptive", "hybrid"] = "adaptive", + max_items: int = 15, + se_threshold: float = DEFAULT_SE_THRESHOLD, + hybrid_transition_slot: int = 10 +) -> dict: + """ + Simulate CAT selection for admin testing. + + Returns sequence of selected items with b values and theta progression. + + Args: + db: Database session + tryout_id: Tryout identifier + website_id: Website identifier + initial_theta: Starting theta value + selection_mode: Selection mode to use + max_items: Maximum items to simulate + se_threshold: SE threshold for termination + hybrid_transition_slot: Slot to transition in hybrid mode + + Returns: + Dict with simulation results + """ + # Get all items for this tryout + items_query = ( + select(Item) + .where( + Item.tryout_id == tryout_id, + Item.website_id == website_id + ) + .order_by(Item.slot) + ) + + items_result = await db.execute(items_query) + all_items = list(items_result.scalars().all()) + + if not all_items: + return { + "error": "No items found for this tryout", + "tryout_id": tryout_id, + "website_id": website_id + } + + # Simulate selection + selected_items = [] + current_theta = initial_theta + current_se = 3.0 # Start with high uncertainty + used_item_ids = set() + + for i in range(max_items): + # Get available items + available_items = [item for item in all_items if item.id not in used_item_ids] + + if not available_items: + break + + # Select based on mode + if selection_mode == "adaptive": + # Filter to calibrated items only + calibrated_items = [item for item in available_items if item.calibrated and item.irt_b is not None] + + if not calibrated_items: + # Fallback to any available item + calibrated_items = available_items + + # Find item closest to current theta + best_item = min( + calibrated_items, + key=lambda item: abs((item.irt_b or 0) - current_theta) + ) + elif selection_mode == "fixed": + # Select in slot order + best_item = min(available_items, key=lambda item: item.slot) + else: # hybrid + if i < hybrid_transition_slot: + best_item = min(available_items, key=lambda item: item.slot) + else: + calibrated_items = [item for item in available_items if item.calibrated and item.irt_b is not None] + if calibrated_items: + best_item = min( + calibrated_items, + key=lambda item: abs((item.irt_b or 0) - current_theta) + ) + else: + best_item = min(available_items, key=lambda item: item.slot) + + used_item_ids.add(best_item.id) + + # Simulate response (random based on probability) + import random + b = best_item.irt_b or estimate_b_from_ctt_p(best_item.ctt_p) if best_item.ctt_p else 0.0 + p_correct = 1.0 / (1.0 + math.exp(-(current_theta - b))) + is_correct = random.random() < p_correct + + # Update theta (simplified) + responses = [1 if item.get('is_correct', True) else 0 for item in selected_items] + responses.append(1 if is_correct else 0) + b_params = [item['b'] for item in selected_items] + b_params.append(b) + + new_theta, new_se = estimate_theta_mle(responses, b_params, current_theta) + current_theta = new_theta + current_se = new_se + + selected_items.append({ + "slot": best_item.slot, + "level": best_item.level, + "b": b, + "is_correct": is_correct, + "theta_after": current_theta, + "se_after": current_se, + "calibrated": best_item.calibrated + }) + + # Check SE threshold + if current_se < se_threshold and i >= 14: # At least 15 items + break + + return { + "tryout_id": tryout_id, + "website_id": website_id, + "initial_theta": initial_theta, + "selection_mode": selection_mode, + "total_items": len(selected_items), + "final_theta": current_theta, + "final_se": current_se, + "se_threshold_met": current_se < se_threshold, + "items": selected_items + } diff --git a/app/services/config_management.py b/app/services/config_management.py new file mode 100644 index 0000000..9b55f77 --- /dev/null +++ b/app/services/config_management.py @@ -0,0 +1,431 @@ +""" +Configuration Management Service. + +Provides functions to retrieve and update tryout configurations. +Handles configuration changes for scoring, selection, and normalization modes. +""" + +import logging +from typing import Any, Dict, Literal, Optional + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.tryout import Tryout +from app.models.tryout_stats import TryoutStats + +logger = logging.getLogger(__name__) + + +async def get_config( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> Tryout: + """ + Fetch tryout configuration for a specific tryout. + + Returns all configuration fields including scoring_mode, selection_mode, + normalization_mode, and other settings. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Tryout model with all configuration fields + + Raises: + ValueError: If tryout not found + """ + result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = result.scalar_one_or_none() + + if tryout is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + + return tryout + + +async def update_config( + db: AsyncSession, + website_id: int, + tryout_id: str, + config_updates: Dict[str, Any], +) -> Tryout: + """ + Update tryout configuration with provided fields. + + Accepts a dictionary of configuration updates and applies them to the + tryout configuration. Only provided fields are updated. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + config_updates: Dictionary of configuration fields to update + + Returns: + Updated Tryout model + + Raises: + ValueError: If tryout not found or invalid field provided + """ + # Fetch tryout + result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = result.scalar_one_or_none() + + if tryout is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + + # Valid configuration fields + valid_fields = { + "name", "description", + "scoring_mode", "selection_mode", "normalization_mode", + "min_sample_for_dynamic", "static_rataan", "static_sb", + "ai_generation_enabled", + "hybrid_transition_slot", + "min_calibration_sample", "theta_estimation_method", "fallback_to_ctt_on_error", + } + + # Update only valid fields + updated_fields = [] + for field, value in config_updates.items(): + if field not in valid_fields: + logger.warning(f"Skipping invalid config field: {field}") + continue + + setattr(tryout, field, value) + updated_fields.append(field) + + if not updated_fields: + logger.warning(f"No valid config fields to update for tryout {tryout_id}") + + await db.flush() + + logger.info( + f"Updated config for tryout {tryout_id}, website {website_id}: " + f"{', '.join(updated_fields)}" + ) + + return tryout + + +async def toggle_normalization_mode( + db: AsyncSession, + website_id: int, + tryout_id: str, + new_mode: Literal["static", "dynamic", "hybrid"], +) -> Tryout: + """ + Toggle normalization mode for a tryout. + + Updates the normalization_mode field. If switching to "auto" (dynamic mode), + checks if threshold is met and logs appropriate warnings. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + new_mode: New normalization mode ("static", "dynamic", "hybrid") + + Returns: + Updated Tryout model + + Raises: + ValueError: If tryout not found or invalid mode provided + """ + if new_mode not in ["static", "dynamic", "hybrid"]: + raise ValueError( + f"Invalid normalization_mode: {new_mode}. " + "Must be 'static', 'dynamic', or 'hybrid'" + ) + + # Fetch tryout with stats + result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = result.scalar_one_or_none() + + if tryout is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + + old_mode = tryout.normalization_mode + tryout.normalization_mode = new_mode + + # Fetch stats for participant count + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + participant_count = stats.participant_count if stats else 0 + min_sample = tryout.min_sample_for_dynamic + + # Log warnings and suggestions based on mode change + if new_mode == "dynamic": + if participant_count < min_sample: + logger.warning( + f"Switching to dynamic normalization with only {participant_count} " + f"participants (threshold: {min_sample}). " + "Dynamic normalization may produce unreliable results." + ) + else: + logger.info( + f"Switching to dynamic normalization with {participant_count} " + f"participants (threshold: {min_sample}). " + "Ready for dynamic normalization." + ) + + elif new_mode == "hybrid": + if participant_count >= min_sample: + logger.info( + f"Switching to hybrid normalization with {participant_count} " + f"participants (threshold: {min_sample}). " + "Will use dynamic normalization immediately." + ) + else: + logger.info( + f"Switching to hybrid normalization with {participant_count} " + f"participants (threshold: {min_sample}). " + f"Will use static normalization until {min_sample} participants reached." + ) + + await db.flush() + + logger.info( + f"Toggled normalization mode for tryout {tryout_id}, " + f"website {website_id}: {old_mode} -> {new_mode}" + ) + + return tryout + + +async def get_normalization_config( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> Dict[str, Any]: + """ + Get normalization configuration summary. + + Returns current normalization mode, static values, dynamic values, + participant count, and threshold status. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Dictionary with normalization configuration summary + + Raises: + ValueError: If tryout not found + """ + # Fetch tryout config + tryout = await get_config(db, website_id, tryout_id) + + # Fetch stats + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + # Determine threshold status + participant_count = stats.participant_count if stats else 0 + min_sample = tryout.min_sample_for_dynamic + threshold_ready = participant_count >= min_sample + participants_needed = max(0, min_sample - participant_count) + + # Determine current effective mode + current_mode = tryout.normalization_mode + if current_mode == "hybrid": + effective_mode = "dynamic" if threshold_ready else "static" + else: + effective_mode = current_mode + + return { + "tryout_id": tryout_id, + "normalization_mode": current_mode, + "effective_mode": effective_mode, + "static_rataan": tryout.static_rataan, + "static_sb": tryout.static_sb, + "dynamic_rataan": stats.rataan if stats else None, + "dynamic_sb": stats.sb if stats else None, + "participant_count": participant_count, + "min_sample_for_dynamic": min_sample, + "threshold_ready": threshold_ready, + "participants_needed": participants_needed, + } + + +async def reset_normalization_stats( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> TryoutStats: + """ + Reset TryoutStats to initial values. + + Resets participant_count to 0 and clears running sums. + Switches normalization_mode to "static" temporarily. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Reset TryoutStats record + + Raises: + ValueError: If tryout not found + """ + # Fetch tryout + tryout_result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + if tryout is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + + # Switch to static mode temporarily + tryout.normalization_mode = "static" + + # Fetch or create stats + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + if stats is None: + # Create new empty stats record + stats = TryoutStats( + website_id=website_id, + tryout_id=tryout_id, + participant_count=0, + total_nm_sum=0.0, + total_nm_sq_sum=0.0, + rataan=None, + sb=None, + min_nm=None, + max_nm=None, + ) + db.add(stats) + else: + # Reset existing stats + stats.participant_count = 0 + stats.total_nm_sum = 0.0 + stats.total_nm_sq_sum = 0.0 + stats.rataan = None + stats.sb = None + stats.min_nm = None + stats.max_nm = None + + await db.flush() + + logger.info( + f"Reset normalization stats for tryout {tryout_id}, " + f"website {website_id}. Normalization mode switched to static." + ) + + return stats + + +async def get_full_config( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> Dict[str, Any]: + """ + Get full tryout configuration including stats. + + Returns all configuration fields plus current statistics. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Dictionary with full configuration and stats + + Raises: + ValueError: If tryout not found + """ + # Fetch tryout config + tryout = await get_config(db, website_id, tryout_id) + + # Fetch stats + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + # Build config dictionary + config = { + "tryout_id": tryout.tryout_id, + "name": tryout.name, + "description": tryout.description, + "scoring_mode": tryout.scoring_mode, + "selection_mode": tryout.selection_mode, + "normalization_mode": tryout.normalization_mode, + "min_sample_for_dynamic": tryout.min_sample_for_dynamic, + "static_rataan": tryout.static_rataan, + "static_sb": tryout.static_sb, + "ai_generation_enabled": tryout.ai_generation_enabled, + "hybrid_transition_slot": tryout.hybrid_transition_slot, + "min_calibration_sample": tryout.min_calibration_sample, + "theta_estimation_method": tryout.theta_estimation_method, + "fallback_to_ctt_on_error": tryout.fallback_to_ctt_on_error, + "stats": { + "participant_count": stats.participant_count if stats else 0, + "rataan": stats.rataan if stats else None, + "sb": stats.sb if stats else None, + "min_nm": stats.min_nm if stats else None, + "max_nm": stats.max_nm if stats else None, + "last_calculated": stats.last_calculated if stats else None, + }, + "created_at": tryout.created_at, + "updated_at": tryout.updated_at, + } + + return config diff --git a/app/services/ctt_scoring.py b/app/services/ctt_scoring.py new file mode 100644 index 0000000..40f7220 --- /dev/null +++ b/app/services/ctt_scoring.py @@ -0,0 +1,385 @@ +""" +CTT (Classical Test Theory) Scoring Engine. + +Implements exact Excel formulas for: +- p-value (Tingkat Kesukaran): p = Σ Benar / Total Peserta +- Bobot (Weight): Bobot = 1 - p +- NM (Nilai Mentah): NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000 +- NN (Nilai Nasional): NN = 500 + 100 × ((NM - Rataan) / SB) + +All formulas match PRD Section 13.1 exactly. +""" + +import math +from datetime import datetime, timezone +from typing import Optional + +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.item import Item +from app.models.tryout_stats import TryoutStats +from app.models.user_answer import UserAnswer + + +def calculate_ctt_p(total_correct: int, total_participants: int) -> float: + """ + Calculate CTT p-value (Tingkat Kesukaran / Difficulty). + + Formula: p = Σ Benar / Total Peserta + + Args: + total_correct: Number of correct answers (Σ Benar) + total_participants: Total number of participants (Total Peserta) + + Returns: + p-value in range [0.0, 1.0] + + Raises: + ValueError: If total_participants is 0 or values are invalid + """ + if total_participants <= 0: + raise ValueError("total_participants must be greater than 0") + if total_correct < 0: + raise ValueError("total_correct cannot be negative") + if total_correct > total_participants: + raise ValueError("total_correct cannot exceed total_participants") + + p = total_correct / total_participants + + # Clamp to valid range [0, 1] + return max(0.0, min(1.0, p)) + + +def calculate_ctt_bobot(p_value: float) -> float: + """ + Calculate CTT bobot (weight) from p-value. + + Formula: Bobot = 1 - p + + Interpretation: + - Easy questions (p > 0.70) have low bobot (< 0.30) + - Difficult questions (p < 0.30) have high bobot (> 0.70) + - Medium questions (0.30 ≤ p ≤ 0.70) have moderate bobot + + Args: + p_value: CTT p-value in range [0.0, 1.0] + + Returns: + bobot (weight) in range [0.0, 1.0] + + Raises: + ValueError: If p_value is outside [0, 1] range + """ + if not 0.0 <= p_value <= 1.0: + raise ValueError(f"p_value must be in range [0, 1], got {p_value}") + + bobot = 1.0 - p_value + + # Clamp to valid range [0, 1] + return max(0.0, min(1.0, bobot)) + + +def calculate_ctt_nm(total_bobot_siswa: float, total_bobot_max: float) -> int: + """ + Calculate CTT NM (Nilai Mentah / Raw Score). + + Formula: NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000 + + This is equivalent to Excel's SUMPRODUCT calculation where: + - Total_Bobot_Siswa = Σ(bobot_earned for each correct answer) + - Total_Bobot_Max = Σ(bobot for all questions) + + Args: + total_bobot_siswa: Total weight earned by student + total_bobot_max: Maximum possible weight (sum of all item bobots) + + Returns: + NM (raw score) in range [0, 1000] + + Raises: + ValueError: If total_bobot_max is 0 or values are invalid + """ + if total_bobot_max <= 0: + raise ValueError("total_bobot_max must be greater than 0") + if total_bobot_siswa < 0: + raise ValueError("total_bobot_siswa cannot be negative") + + nm = (total_bobot_siswa / total_bobot_max) * 1000 + + # Round to integer and clamp to valid range [0, 1000] + nm_int = round(nm) + return max(0, min(1000, nm_int)) + + +def calculate_ctt_nn(nm: int, rataan: float, sb: float) -> int: + """ + Calculate CTT NN (Nilai Nasional / Normalized Score). + + Formula: NN = 500 + 100 × ((NM - Rataan) / SB) + + Normalizes scores to mean=500, SD=100 distribution. + + Args: + nm: Nilai Mentah (raw score) in range [0, 1000] + rataan: Mean of NM scores + sb: Standard deviation of NM scores (Simpangan Baku) + + Returns: + NN (normalized score) in range [0, 1000] + + Raises: + ValueError: If nm is out of range or sb is invalid + """ + if not 0 <= nm <= 1000: + raise ValueError(f"nm must be in range [0, 1000], got {nm}") + if sb <= 0: + # If SD is 0 or negative, return default normalized score + # This handles edge case where all scores are identical + return 500 + + # Calculate normalized score + z_score = (nm - rataan) / sb + nn = 500 + 100 * z_score + + # Round to integer and clamp to valid range [0, 1000] + nn_int = round(nn) + return max(0, min(1000, nn_int)) + + +def categorize_difficulty(p_value: float) -> str: + """ + Categorize question difficulty based on CTT p-value. + + Categories per CTT standards (PRD Section 13.2): + - p < 0.30 → Sukar (Sulit) + - 0.30 ≤ p ≤ 0.70 → Sedang + - p > 0.70 → Mudah + + Args: + p_value: CTT p-value in range [0.0, 1.0] + + Returns: + Difficulty category: "mudah", "sedang", or "sulit" + """ + if p_value > 0.70: + return "mudah" + elif p_value >= 0.30: + return "sedang" + else: + return "sulit" + + +async def calculate_ctt_p_for_item( + db: AsyncSession, item_id: int +) -> Optional[float]: + """ + Calculate CTT p-value for a specific item from existing responses. + + Queries all UserAnswer records for the item to calculate: + p = Σ Benar / Total Peserta + + Args: + db: Async database session + item_id: Item ID to calculate p-value for + + Returns: + p-value in range [0.0, 1.0], or None if no responses exist + """ + # Count total responses and correct responses + result = await db.execute( + select( + func.count().label("total"), + func.sum(func.cast(UserAnswer.is_correct, type_=func.INTEGER)).label("correct"), + ).where(UserAnswer.item_id == item_id) + ) + row = result.first() + + if row is None or row.total == 0: + return None + + return calculate_ctt_p(row.correct or 0, row.total) + + +async def update_tryout_stats( + db: AsyncSession, + website_id: int, + tryout_id: str, + nm: int, +) -> TryoutStats: + """ + Incrementally update TryoutStats with new NM score. + + Updates: + - participant_count += 1 + - total_nm_sum += nm + - total_nm_sq_sum += nm² + - Recalculates rataan (mean) and sb (standard deviation) + - Updates min_nm and max_nm if applicable + + Uses Welford's online algorithm for numerically stable variance calculation. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + nm: New NM score to add + + Returns: + Updated TryoutStats record + """ + # Get or create TryoutStats + result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = result.scalar_one_or_none() + + if stats is None: + # Create new stats record + stats = TryoutStats( + website_id=website_id, + tryout_id=tryout_id, + participant_count=1, + total_nm_sum=float(nm), + total_nm_sq_sum=float(nm * nm), + rataan=float(nm), + sb=0.0, # SD is 0 for single data point + min_nm=nm, + max_nm=nm, + last_calculated=datetime.now(timezone.utc), + ) + db.add(stats) + else: + # Incrementally update existing stats + stats.participant_count += 1 + stats.total_nm_sum += nm + stats.total_nm_sq_sum += nm * nm + + # Update min/max + if stats.min_nm is None or nm < stats.min_nm: + stats.min_nm = nm + if stats.max_nm is None or nm > stats.max_nm: + stats.max_nm = nm + + # Recalculate mean and SD + n = stats.participant_count + sum_nm = stats.total_nm_sum + sum_nm_sq = stats.total_nm_sq_sum + + # Mean = Σ NM / n + stats.rataan = sum_nm / n + + # Variance = (Σ NM² / n) - (mean)² + # Using population standard deviation + if n > 1: + variance = (sum_nm_sq / n) - (stats.rataan ** 2) + # Clamp variance to non-negative (handles floating point errors) + variance = max(0.0, variance) + stats.sb = math.sqrt(variance) + else: + stats.sb = 0.0 + + stats.last_calculated = datetime.now(timezone.utc) + + await db.flush() + return stats + + +async def get_total_bobot_max( + db: AsyncSession, + website_id: int, + tryout_id: str, + level: str = "sedang", +) -> float: + """ + Calculate total maximum bobot for a tryout. + + Total_Bobot_Max = Σ bobot for all questions in the tryout + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + level: Difficulty level to filter by (default: "sedang") + + Returns: + Sum of all item bobots + + Raises: + ValueError: If no items found or items have no bobot values + """ + result = await db.execute( + select(func.sum(Item.ctt_bobot)).where( + Item.website_id == website_id, + Item.tryout_id == tryout_id, + Item.level == level, + ) + ) + total_bobot = result.scalar() + + if total_bobot is None or total_bobot == 0: + raise ValueError( + f"No items with bobot found for tryout {tryout_id}, level {level}" + ) + + return float(total_bobot) + + +def convert_ctt_p_to_irt_b(p_value: float) -> float: + """ + Convert CTT p-value to IRT difficulty parameter (b). + + Formula: b ≈ -ln((1-p)/p) + + This provides an initial estimate for IRT calibration. + Maps p ∈ (0, 1) to b ∈ (-∞, +∞), typically [-3, +3]. + + Args: + p_value: CTT p-value in range (0.0, 1.0) + + Returns: + IRT b-parameter estimate + + Raises: + ValueError: If p_value is at boundaries (0 or 1) + """ + if p_value <= 0.0 or p_value >= 1.0: + # Handle edge cases by clamping + if p_value <= 0.0: + return 3.0 # Very difficult + else: + return -3.0 # Very easy + + # b ≈ -ln((1-p)/p) + odds_ratio = (1 - p_value) / p_value + b = -math.log(odds_ratio) + + # Clamp to valid IRT range [-3, +3] + return max(-3.0, min(3.0, b)) + + +def map_theta_to_nn(theta: float) -> int: + """ + Map IRT theta (ability) to NN score for comparison. + + Formula: NN = 500 + (θ / 3) × 500 + + Maps θ ∈ [-3, +3] to NN ∈ [0, 1000]. + + Args: + theta: IRT ability estimate in range [-3.0, +3.0] + + Returns: + NN score in range [0, 1000] + """ + # Clamp theta to valid range + theta_clamped = max(-3.0, min(3.0, theta)) + + # Map to NN + nn = 500 + (theta_clamped / 3) * 500 + + # Round and clamp to valid range + return max(0, min(1000, round(nn))) diff --git a/app/services/excel_import.py b/app/services/excel_import.py new file mode 100644 index 0000000..58ad9df --- /dev/null +++ b/app/services/excel_import.py @@ -0,0 +1,521 @@ +""" +Excel Import/Export Service for Question Migration. + +Handles import from standardized Excel format with: +- Row 2: KUNCI (answer key) +- Row 4: TK (tingkat kesukaran p-value) +- Row 5: BOBOT (weight 1-p) +- Rows 6+: Individual question data + +Ensures 100% data integrity with comprehensive validation. +""" + +import os +from datetime import datetime +from typing import Any, Dict, List, Optional + +import openpyxl +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.item import Item +from app.services.ctt_scoring import ( + convert_ctt_p_to_irt_b, + categorize_difficulty, +) + + +def validate_excel_structure(file_path: str) -> Dict[str, Any]: + """ + Validate Excel file structure against required format. + + Checks: + - File exists and is valid Excel (.xlsx) + - Sheet "CONTOH" exists + - Required rows exist (Row 2 KUNCI, Row 4 TK, Row 5 BOBOT) + - Question data rows have required columns + + Args: + file_path: Path to Excel file + + Returns: + Dict with: + - valid: bool - Whether structure is valid + - errors: List[str] - Validation errors if any + """ + errors: List[str] = [] + + # Check file exists + if not os.path.exists(file_path): + return {"valid": False, "errors": [f"File not found: {file_path}"]} + + # Check file extension + if not file_path.lower().endswith('.xlsx'): + return {"valid": False, "errors": ["File must be .xlsx format"]} + + try: + wb = openpyxl.load_workbook(file_path, data_only=False) + except Exception as e: + return {"valid": False, "errors": [f"Failed to load Excel file: {str(e)}"]} + + # Check sheet "CONTOH" exists + if "CONTOH" not in wb.sheetnames: + return { + "valid": False, + "errors": ['Sheet "CONTOH" not found. Available sheets: ' + ", ".join(wb.sheetnames)] + } + + ws = wb["CONTOH"] + + # Check minimum rows exist + if ws.max_row < 6: + errors.append(f"Excel file must have at least 6 rows (found {ws.max_row})") + + # Check Row 2 exists (KUNCI) + if ws.max_row < 2: + errors.append("Row 2 (KUNCI - answer key) is required") + + # Check Row 4 exists (TK - p-values) + if ws.max_row < 4: + errors.append("Row 4 (TK - p-values) is required") + + # Check Row 5 exists (BOBOT - weights) + if ws.max_row < 5: + errors.append("Row 5 (BOBOT - weights) is required") + + # Check question data rows exist (6+) + if ws.max_row < 6: + errors.append("Question data rows (6+) are required") + + # Check minimum columns (at least slot, level, soal_text, options, correct_answer) + if ws.max_column < 8: + errors.append( + f"Excel file must have at least 8 columns (found {ws.max_column}). " + "Expected: slot, level, soal_text, options_A, options_B, options_C, options_D, correct_answer" + ) + + # Check KUNCI row has values + if ws.max_row >= 2: + kunce_row_values = [ws.cell(2, col).value for col in range(4, ws.max_column + 1)] + if not any(v for v in kunce_row_values if v and v != "KUNCI"): + errors.append("Row 2 (KUNCI) must contain answer key values") + + # Check TK row has numeric values + if ws.max_row >= 4: + wb_data = openpyxl.load_workbook(file_path, data_only=True) + ws_data = wb_data["CONTOH"] + tk_row_values = [ws_data.cell(4, col).value for col in range(4, ws.max_column + 1)] + if not any(v for v in tk_row_values if isinstance(v, (int, float))): + errors.append("Row 4 (TK) must contain numeric p-values") + + # Check BOBOT row has numeric values + if ws.max_row >= 5: + wb_data = openpyxl.load_workbook(file_path, data_only=True) + ws_data = wb_data["CONTOH"] + bobot_row_values = [ws_data.cell(5, col).value for col in range(4, ws.max_column + 1)] + if not any(v for v in bobot_row_values if isinstance(v, (int, float))): + errors.append("Row 5 (BOBOT) must contain numeric weight values") + + return {"valid": len(errors) == 0, "errors": errors} + + +def parse_excel_import( + file_path: str, + website_id: int, + tryout_id: str +) -> Dict[str, Any]: + """ + Parse Excel file and extract items with full validation. + + Excel structure: + - Sheet name: "CONTOH" + - Row 2: KUNCI (answer key) - extract correct answers per slot + - Row 4: TK (tingkat kesukaran p-value) - extract p-values per slot + - Row 5: BOBOT (weight 1-p) - extract bobot per slot + - Rows 6+: Individual question data + + Args: + file_path: Path to Excel file + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Dict with: + - items: List[Dict] - Parsed items ready for database + - validation_errors: List[str] - Any validation errors + - items_count: int - Number of items parsed + """ + # First validate structure + validation = validate_excel_structure(file_path) + if not validation["valid"]: + return { + "items": [], + "validation_errors": validation["errors"], + "items_count": 0 + } + + items: List[Dict[str, Any]] = [] + errors: List[str] = [] + + try: + # Load workbook twice: once with formulas, once with data_only + wb = openpyxl.load_workbook(file_path, data_only=False) + ws = wb["CONTOH"] + + wb_data = openpyxl.load_workbook(file_path, data_only=True) + ws_data = wb_data["CONTOH"] + + # Extract answer key from Row 2 + answer_key: Dict[int, str] = {} + for col in range(4, ws.max_column + 1): + key_cell = ws.cell(2, col).value + if key_cell and key_cell != "KUNCI": + slot_num = col - 3 # Column 4 -> slot 1 + answer_key[slot_num] = str(key_cell).strip().upper() + + # Extract p-values from Row 4 + p_values: Dict[int, float] = {} + for col in range(4, ws.max_column + 1): + slot_num = col - 3 + if slot_num in answer_key: + p_cell = ws_data.cell(4, col).value + if p_cell and isinstance(p_cell, (int, float)): + p_values[slot_num] = float(p_cell) + + # Extract bobot from Row 5 + bobot_values: Dict[int, float] = {} + for col in range(4, ws.max_column + 1): + slot_num = col - 3 + if slot_num in answer_key: + bobot_cell = ws_data.cell(5, col).value + if bobot_cell and isinstance(bobot_cell, (int, float)): + bobot_values[slot_num] = float(bobot_cell) + + # Parse question data rows (6+) + for row_idx in range(6, ws.max_row + 1): + # Column mapping (based on project-brief): + # Column 1 (A): slot (question number) + # Column 2 (B): level (mudah/sedang/sulit) + # Column 3 (C): soal_text (question stem) + # Column 4 (D): options_A + # Column 5 (E): options_B + # Column 6 (F): options_C + # Column 7 (G): options_D + # Column 8 (H): correct_answer + + slot_cell = ws.cell(row_idx, 1).value + level_cell = ws.cell(row_idx, 2).value + soal_text_cell = ws.cell(row_idx, 3).value + option_a = ws.cell(row_idx, 4).value + option_b = ws.cell(row_idx, 5).value + option_c = ws.cell(row_idx, 6).value + option_d = ws.cell(row_idx, 7).value + correct_cell = ws.cell(row_idx, 8).value + + # Skip empty rows + if not slot_cell and not soal_text_cell: + continue + + # Validate required fields + if not slot_cell: + errors.append(f"Row {row_idx}: Missing slot value") + continue + + slot_num = int(slot_cell) if isinstance(slot_cell, (int, float)) else None + if slot_num is None: + try: + slot_num = int(str(slot_cell).strip()) + except (ValueError, AttributeError): + errors.append(f"Row {row_idx}: Invalid slot value: {slot_cell}") + continue + + # Get or infer level + if not level_cell: + # Use p-value from Row 4 to determine level + p_val = p_values.get(slot_num, 0.5) + level_val = categorize_difficulty(p_val) + else: + level_val = str(level_cell).strip().lower() + if level_val not in ["mudah", "sedang", "sulit"]: + errors.append( + f"Row {row_idx}: Invalid level '{level_cell}'. Must be 'mudah', 'sedang', or 'sulit'" + ) + continue + + # Validate soal_text + if not soal_text_cell: + errors.append(f"Row {row_idx} (slot {slot_num}): Missing soal_text (question stem)") + continue + + # Build options JSON + options: Dict[str, str] = {} + if option_a: + options["A"] = str(option_a).strip() + if option_b: + options["B"] = str(option_b).strip() + if option_c: + options["C"] = str(option_c).strip() + if option_d: + options["D"] = str(option_d).strip() + + if len(options) < 4: + errors.append( + f"Row {row_idx} (slot {slot_num}): Missing options. Expected 4 options (A, B, C, D)" + ) + continue + + # Get correct answer + if not correct_cell: + # Fall back to answer key from Row 2 + correct_ans = answer_key.get(slot_num) + if not correct_ans: + errors.append( + f"Row {row_idx} (slot {slot_num}): Missing correct_answer and no answer key found" + ) + continue + else: + correct_ans = str(correct_cell).strip().upper() + + if correct_ans not in ["A", "B", "C", "D"]: + errors.append( + f"Row {row_idx} (slot {slot_num}): Invalid correct_answer '{correct_ans}'. Must be A, B, C, or D" + ) + continue + + # Get CTT parameters + p_val = p_values.get(slot_num, 0.5) + bobot_val = bobot_values.get(slot_num, 1.0 - p_val) + + # Validate p-value range + if p_val < 0 or p_val > 1: + errors.append( + f"Slot {slot_num}: Invalid p-value {p_val}. Must be in range [0, 1]" + ) + continue + + # Validate bobot range + if bobot_val < 0 or bobot_val > 1: + errors.append( + f"Slot {slot_num}: Invalid bobot {bobot_val}. Must be in range [0, 1]" + ) + continue + + # Calculate CTT category and IRT b parameter + ctt_cat = categorize_difficulty(p_val) + irt_b = convert_ctt_p_to_irt_b(p_val) + + # Build item dict + item = { + "tryout_id": tryout_id, + "website_id": website_id, + "slot": slot_num, + "level": level_val, + "stem": str(soal_text_cell).strip(), + "options": options, + "correct_answer": correct_ans, + "explanation": None, + "ctt_p": p_val, + "ctt_bobot": bobot_val, + "ctt_category": ctt_cat, + "irt_b": irt_b, + "irt_se": None, + "calibrated": False, + "calibration_sample_size": 0, + "generated_by": "manual", + "ai_model": None, + "basis_item_id": None, + } + + items.append(item) + + return { + "items": items, + "validation_errors": errors, + "items_count": len(items) + } + + except Exception as e: + return { + "items": [], + "validation_errors": [f"Parsing error: {str(e)}"], + "items_count": 0 + } + + +async def bulk_insert_items( + items_list: List[Dict[str, Any]], + db: AsyncSession +) -> Dict[str, Any]: + """ + Bulk insert items with duplicate detection. + + Skips duplicates based on (tryout_id, website_id, slot). + + Args: + items_list: List of item dictionaries to insert + db: Async SQLAlchemy database session + + Returns: + Dict with: + - inserted_count: int - Number of items inserted + - duplicate_count: int - Number of duplicates skipped + - errors: List[str] - Any errors during insertion + """ + inserted_count = 0 + duplicate_count = 0 + errors: List[str] = [] + + try: + for item_data in items_list: + # Check for duplicate + result = await db.execute( + select(Item).where( + Item.tryout_id == item_data["tryout_id"], + Item.website_id == item_data["website_id"], + Item.slot == item_data["slot"] + ) + ) + existing = result.scalar_one_or_none() + + if existing: + duplicate_count += 1 + continue + + # Create new item + item = Item(**item_data) + db.add(item) + inserted_count += 1 + + # Commit all inserts + await db.commit() + + return { + "inserted_count": inserted_count, + "duplicate_count": duplicate_count, + "errors": errors + } + + except Exception as e: + await db.rollback() + return { + "inserted_count": 0, + "duplicate_count": duplicate_count, + "errors": [f"Insertion failed: {str(e)}"] + } + + +async def export_questions_to_excel( + tryout_id: str, + website_id: int, + db: AsyncSession, + output_path: Optional[str] = None +) -> str: + """ + Export questions to Excel in standardized format. + + Creates Excel workbook with: + - Sheet "CONTOH" + - Row 2: KUNCI (answer key) + - Row 4: TK (p-values) + - Row 5: BOBOT (weights) + - Rows 6+: Question data + + Args: + tryout_id: Tryout identifier + website_id: Website identifier + db: Async SQLAlchemy database session + output_path: Optional output file path. If not provided, generates temp file. + + Returns: + Path to exported Excel file + """ + # Fetch all items for this tryout + result = await db.execute( + select(Item).filter( + Item.tryout_id == tryout_id, + Item.website_id == website_id + ).order_by(Item.slot) + ) + items = result.scalars().all() + + if not items: + raise ValueError(f"No items found for tryout_id={tryout_id}, website_id={website_id}") + + # Create workbook + wb = openpyxl.Workbook() + ws = wb.active + ws.title = "CONTOH" + + # Determine max slot for column sizing + max_slot = max(item.slot for item in items) + + # Row 1: Header + ws.cell(1, 1, "No") + ws.cell(1, 2, "Level") + ws.cell(1, 3, "Soal") + for slot_idx in range(max_slot): + col = slot_idx + 4 + ws.cell(1, col, f"Soal {slot_idx + 1}") + + # Row 2: KUNCI (answer key) + ws.cell(2, 1, "") + ws.cell(2, 2, "") + ws.cell(2, 3, "KUNCI") + for item in items: + col = item.slot + 3 + ws.cell(2, col, item.correct_answer) + + # Row 3: Empty + ws.cell(3, 1, "") + ws.cell(3, 2, "") + ws.cell(3, 3, "") + + # Row 4: TK (p-values) + ws.cell(4, 1, "") + ws.cell(4, 2, "") + ws.cell(4, 3, "TK") + for item in items: + col = item.slot + 3 + ws.cell(4, col, item.ctt_p or 0.5) + + # Row 5: BOBOT (weights) + ws.cell(5, 1, "") + ws.cell(5, 2, "") + ws.cell(5, 3, "BOBOT") + for item in items: + col = item.slot + 3 + ws.cell(5, col, item.ctt_bobot or (1.0 - (item.ctt_p or 0.5))) + + # Rows 6+: Question data + row_idx = 6 + for item in items: + # Column 1: Slot number + ws.cell(row_idx, 1, item.slot) + + # Column 2: Level + ws.cell(row_idx, 2, item.level) + + # Column 3: Soal text (stem) + ws.cell(row_idx, 3, item.stem) + + # Columns 4+: Options + options = item.options or {} + ws.cell(row_idx, 4, options.get("A", "")) + ws.cell(row_idx, 5, options.get("B", "")) + ws.cell(row_idx, 6, options.get("C", "")) + ws.cell(row_idx, 7, options.get("D", "")) + + # Column 8: Correct answer + ws.cell(row_idx, 8, item.correct_answer) + + row_idx += 1 + + # Generate output path if not provided + if output_path is None: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_path = f"/tmp/tryout_{tryout_id}_export_{timestamp}.xlsx" + + # Save workbook + wb.save(output_path) + + return output_path diff --git a/app/services/irt_calibration.py b/app/services/irt_calibration.py new file mode 100644 index 0000000..d5047d2 --- /dev/null +++ b/app/services/irt_calibration.py @@ -0,0 +1,1124 @@ +""" +IRT Calibration Service for Item Response Theory calculations. + +Provides theta estimation, item calibration, and Fisher information calculations +for the 1PL (Rasch) IRT model. +""" + +import math +from typing import Optional + +import numpy as np +from scipy.optimize import minimize_scalar +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models import Item, Session, UserAnswer + + +class IRTCalibrationError(Exception): + """Exception raised for IRT calibration errors.""" + pass + + +def calculate_fisher_information(theta: float, b: float) -> float: + """ + Calculate Fisher information for 1PL model at given theta. + + I(θ) = P(θ) * (1 - P(θ)) + where P(θ) = 1 / (1 + e^-(θ-b)) + + Args: + theta: Student ability estimate + b: Item difficulty parameter + + Returns: + Fisher information value + """ + p = calculate_probability(theta, b) + return p * (1 - p) + + +def calculate_probability(theta: float, b: float) -> float: + """ + Calculate probability of correct response using 1PL Rasch model. + + P(θ) = 1 / (1 + e^-(θ-b)) + + Args: + theta: Student ability estimate + b: Item difficulty parameter + + Returns: + Probability of correct response [0, 1] + """ + exponent = theta - b + # Numerical stability: clip exponent + exponent = max(-30, min(30, exponent)) + return 1.0 / (1.0 + math.exp(-exponent)) + + +def estimate_theta_mle( + responses: list[int], + b_params: list[float], + initial_theta: float = 0.0 +) -> tuple[float, float]: + """ + Estimate student ability theta using Maximum Likelihood Estimation. + + Args: + responses: Binary responses [0, 1, 1, 0, ...] + b_params: Item difficulty parameters [b1, b2, b3, ...] + initial_theta: Initial theta guess (default 0.0) + + Returns: + Tuple of (theta, standard_error) + + Raises: + IRTCalibrationError: If inputs are invalid + """ + responses = np.asarray(responses, dtype=float) + b_params = np.asarray(b_params, dtype=float) + + # Edge case: empty or mismatched inputs + if len(responses) == 0 or len(b_params) == 0: + return 0.0, 3.0 # Return default with high SE + + if len(responses) != len(b_params): + raise IRTCalibrationError("responses and b_params must have same length") + + n = len(responses) + sum_resp = np.sum(responses) + + # Edge case: all correct - return high theta + if sum_resp == n: + return 3.0, 1.5 # Clamped to max theta + + # Edge case: all incorrect - return low theta + if sum_resp == 0: + return -3.0, 1.5 # Clamped to min theta + + def neg_log_likelihood(theta: float) -> float: + """Negative log-likelihood for minimization.""" + exponent = theta - b_params + # Numerical stability: clip exponent + exponent = np.clip(exponent, -30, 30) + p = 1.0 / (1.0 + np.exp(-exponent)) + # Avoid log(0) + p = np.clip(p, 1e-10, 1 - 1e-10) + ll = np.sum(responses * np.log(p) + (1 - responses) * np.log(1 - p)) + return -ll + + result = minimize_scalar(neg_log_likelihood, bounds=(-3, 3), method='bounded') + + if result.success: + theta = float(result.x) + else: + theta = initial_theta + + # Calculate standard error using Fisher information + se = calculate_theta_se(theta, list(b_params)) + + # Clamp theta to valid range + theta = max(-3.0, min(3.0, theta)) + + return theta, se + + +def calculate_theta_se(theta: float, b_params: list[float]) -> float: + """ + Calculate standard error of theta estimate using Fisher information. + + SE = 1 / sqrt(sum(I(θ))) + where I(θ) = P(θ) * (1 - P(θ)) for each item + + Args: + theta: Current theta estimate + b_params: List of item difficulty parameters + + Returns: + Standard error of theta estimate + """ + if not b_params: + return 3.0 # High uncertainty + + total_info = 0.0 + for b in b_params: + p = calculate_probability(theta, b) + info = p * (1 - p) + total_info += info + + if total_info <= 0: + return 3.0 # High uncertainty + + se = 1.0 / math.sqrt(total_info) + + # Cap SE at reasonable maximum + return min(se, 3.0) + + +def estimate_b_from_ctt_p(ctt_p: float) -> float: + """ + Convert CTT difficulty (p-value) to IRT difficulty (b parameter). + + Uses the approximation: b ≈ -ln((1-p)/p) + + Args: + ctt_p: CTT difficulty (proportion correct) [0, 1] + + Returns: + IRT difficulty parameter b [-3, +3] + """ + if ctt_p is None: + return 0.0 + + # Handle edge cases + if ctt_p >= 1.0: + return -3.0 # Very easy + if ctt_p <= 0.0: + return 3.0 # Very hard + + # Clamp to avoid extreme values + ctt_p = max(0.01, min(0.99, ctt_p)) + + b = -math.log((1 - ctt_p) / ctt_p) + + # Clamp to valid range + return max(-3.0, min(3.0, b)) + + +async def get_session_responses( + db: AsyncSession, + session_id: str +) -> tuple[list[int], list[float]]: + """ + Get all responses and b-parameters for a session. + + Args: + db: Database session + session_id: Session identifier + + Returns: + Tuple of (responses, b_params) + """ + # Get all user answers for this session with item b parameters + query = ( + select(UserAnswer, Item) + .join(Item, UserAnswer.item_id == Item.id) + .where(UserAnswer.session_id == session_id) + .order_by(UserAnswer.id) + ) + + result = await db.execute(query) + rows = result.all() + + responses = [] + b_params = [] + + for user_answer, item in rows: + responses.append(1 if user_answer.is_correct else 0) + # Use item's irt_b if calibrated, otherwise estimate from CTT p + if item.calibrated and item.irt_b is not None: + b_params.append(item.irt_b) + elif item.ctt_p is not None: + b_params.append(estimate_b_from_ctt_p(item.ctt_p)) + else: + b_params.append(0.0) # Default difficulty + + return responses, b_params + + +async def update_session_theta( + db: AsyncSession, + session_id: str, + force_recalculate: bool = False +) -> tuple[float, float]: + """ + Update session theta estimate based on all responses. + + Args: + db: Database session + session_id: Session identifier + force_recalculate: Force recalculation even if theta exists + + Returns: + Tuple of (theta, theta_se) + """ + # Get session + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise IRTCalibrationError(f"Session {session_id} not found") + + # Get responses and b-parameters + responses, b_params = await get_session_responses(db, session_id) + + if not responses: + # No responses yet, initialize theta + session.theta = 0.0 + session.theta_se = 3.0 + await db.commit() + return 0.0, 3.0 + + # Estimate theta + initial_theta = session.theta if session.theta is not None else 0.0 + theta, se = estimate_theta_mle(responses, b_params, initial_theta) + + # Update session + session.theta = theta + session.theta_se = se + await db.commit() + + return theta, se + + +async def update_theta_after_response( + db: AsyncSession, + session_id: str, + item_id: int, + is_correct: bool +) -> tuple[float, float]: + """ + Update session theta after a single response. + + This is an incremental update for real-time theta tracking. + + Args: + db: Database session + session_id: Session identifier + item_id: Item that was answered + is_correct: Whether the answer was correct + + Returns: + Tuple of (theta, theta_se) + """ + # Get session + session_query = select(Session).where(Session.session_id == session_id) + session_result = await db.execute(session_query) + session = session_result.scalar_one_or_none() + + if not session: + raise IRTCalibrationError(f"Session {session_id} not found") + + # Get item b parameter + item_query = select(Item).where(Item.id == item_id) + item_result = await db.execute(item_query) + item = item_result.scalar_one_or_none() + + if not item: + raise IRTCalibrationError(f"Item {item_id} not found") + + # Get b parameter + if item.calibrated and item.irt_b is not None: + b = item.irt_b + elif item.ctt_p is not None: + b = estimate_b_from_ctt_p(item.ctt_p) + else: + b = 0.0 + + # Get all responses including the new one + responses, b_params = await get_session_responses(db, session_id) + + # Add current response if not already in list + responses.append(1 if is_correct else 0) + b_params.append(b) + + # Estimate theta + initial_theta = session.theta if session.theta is not None else 0.0 + theta, se = estimate_theta_mle(responses, b_params, initial_theta) + + # Update session + session.theta = theta + session.theta_se = se + await db.commit() + + return theta, se + + +def theta_to_nn(theta: float) -> int: + """ + Convert IRT theta to CTT-equivalent NN score. + + Formula: NN = 500 + (θ / 3) × 500 + + Args: + theta: IRT ability estimate [-3, +3] + + Returns: + NN score [0, 1000] + """ + # Clamp theta to valid range + theta = max(-3.0, min(3.0, theta)) + + nn = 500 + (theta / 3.0) * 500 + + # Clamp to valid range + return int(max(0, min(1000, nn))) + + +def nn_to_theta(nn: int) -> float: + """ + Convert CTT NN score to IRT theta. + + Formula: θ = ((NN - 500) / 500) × 3 + + Args: + nn: NN score [0, 1000] + + Returns: + IRT theta [-3, +3] + """ + # Clamp nn to valid range + nn = max(0, min(1000, nn)) + + theta = ((nn - 500) / 500.0) * 3.0 + + # Clamp to valid range + return max(-3.0, min(3.0, theta)) + + +def calculate_item_information(theta: float, b: float) -> float: + """ + Calculate item information function at given theta. + + For 1PL model, maximum information occurs when θ = b. + + Args: + theta: Ability level + b: Item difficulty + + Returns: + Item information value + """ + return calculate_fisher_information(theta, b) + + +# ============================================================================= +# Joint MLE Calibration for b-parameters (EM-style iterative) +# ============================================================================= + +# Constants from PRD +THETA_MIN = -3.0 +THETA_MAX = 3.0 +B_MIN = -3.0 +B_MAX = 3.0 +CALIBRATION_SAMPLE_THRESHOLD = 500 # PRD requirement: 500+ responses for calibration +IRT_ROLLOUT_THRESHOLD = 0.90 # PRD requirement: 90% items calibrated for IRT rollout +SE_PRECISION_THRESHOLD = 0.5 # PRD requirement: SE < 0.5 after 15 items +MLE_BOUNDS = (-6.0, 6.0) # Optimization bounds (wider than final clamp) +EDGE_CASE_THETA_HIGH = 4.0 # All correct responses +EDGE_CASE_THETA_LOW = -4.0 # All incorrect responses +NUMERICAL_CLIP = 30 # Exponent clipping for numerical stability + + +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from typing import Union +import logging + +from sqlalchemy import func + +logger = logging.getLogger(__name__) + + +class CalibrationStatus(Enum): + """Calibration status for items and tryouts.""" + NOT_CALIBRATED = "not_calibrated" + INSUFFICIENT_DATA = "insufficient_data" + CONVERGED = "converged" + FAILED = "failed" + FALLBACK_CTT = "fallback_ctt" + + +@dataclass +class CalibrationResult: + """Result of a single item calibration.""" + item_id: int + status: CalibrationStatus + irt_b: Optional[float] = None + irt_se: Optional[float] = None + sample_size: int = 0 + message: str = "" + + @property + def is_calibrated(self) -> bool: + return self.status == CalibrationStatus.CONVERGED + + +@dataclass +class BatchCalibrationResult: + """Result of batch calibration for a tryout.""" + tryout_id: str + website_id: int + total_items: int + calibrated_items: int + failed_items: int + results: list[CalibrationResult] + ready_for_irt: bool + calibration_percentage: float + + @property + def success_rate(self) -> float: + if self.total_items == 0: + return 0.0 + return self.calibrated_items / self.total_items + + +def estimate_b( + responses_matrix: list[list[int]], + max_iterations: int = 20, + convergence_threshold: float = 0.001 +) -> tuple[Optional[np.ndarray], Optional[np.ndarray]]: + """ + Estimate item difficulty parameters using joint MLE for 1PL IRT model. + + Uses EM-style iterative algorithm: + 1. Initialize theta = 0 for all students, b = 0 for all items + 2. For each iteration: + - Update theta for each student given current b + - Update b for each item given current theta + 3. Continue until convergence or max iterations + + Parameters: + ----------- + responses_matrix : list[list[int]] + Response matrix where rows=students, cols=items + entries are 0 or 1 + max_iterations : int + Maximum EM iterations (default: 20) + convergence_threshold : float + Convergence threshold for b parameters (default: 0.001) + + Returns: + -------- + tuple[Optional[np.ndarray], Optional[np.ndarray]] + (b_parameters, se_parameters) + - b clamped to [-3, +3] + - SE calculated using Fisher information + """ + responses_matrix = np.asarray(responses_matrix, dtype=float) + + # Edge case: empty matrix + if responses_matrix.size == 0: + return np.array([]), None + + if responses_matrix.ndim != 2: + raise IRTCalibrationError("responses_matrix must be 2-dimensional") + + n_students, n_items = responses_matrix.shape + + if n_students == 0 or n_items == 0: + return np.zeros(n_items), None + + # Initialize theta and b + theta = np.zeros(n_students) + b = np.zeros(n_items) + + for iteration in range(max_iterations): + b_old = b.copy() + + # Update theta for each student + for i in range(n_students): + resp_i = responses_matrix[i, :] + sum_resp = np.sum(resp_i) + + if sum_resp == n_items: + theta[i] = EDGE_CASE_THETA_HIGH + elif sum_resp == 0: + theta[i] = EDGE_CASE_THETA_LOW + else: + def neg_ll_student(t: float) -> float: + exponent = np.clip(t - b, -NUMERICAL_CLIP, NUMERICAL_CLIP) + p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10) + return -np.sum(resp_i * np.log(p) + (1 - resp_i) * np.log(1 - p)) + + res = minimize_scalar(neg_ll_student, bounds=MLE_BOUNDS, method='bounded') + theta[i] = res.x if res.success else 0.0 + + # Update b for each item + for j in range(n_items): + resp_j = responses_matrix[:, j] + sum_resp = np.sum(resp_j) + + if sum_resp == n_students: + b[j] = -EDGE_CASE_THETA_HIGH # Easy item (everyone correct) + elif sum_resp == 0: + b[j] = EDGE_CASE_THETA_HIGH # Hard item (everyone incorrect) + else: + def neg_ll_item(bj: float) -> float: + exponent = np.clip(theta - bj, -NUMERICAL_CLIP, NUMERICAL_CLIP) + p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10) + return -np.sum(resp_j * np.log(p) + (1 - resp_j) * np.log(1 - p)) + + res = minimize_scalar(neg_ll_item, bounds=MLE_BOUNDS, method='bounded') + b[j] = res.x if res.success else 0.0 + + # Check convergence + if np.max(np.abs(b - b_old)) < convergence_threshold: + logger.debug(f"Joint MLE converged at iteration {iteration + 1}") + break + + # Clamp b to valid range + b = np.array([max(B_MIN, min(B_MAX, float(bj))) for bj in b]) + + # Calculate standard errors for b parameters + se = _calculate_b_se_batch(b, theta) + + return b, se + + +def _calculate_b_se_batch(b_params: np.ndarray, thetas: np.ndarray) -> Optional[np.ndarray]: + """ + Calculate standard errors for all b parameters using Fisher information. + + For 1PL model, Fisher information for item j at theta is: + I(b_j) = Σ P(θ_i) * (1 - P(θ_i)) + And SE = 1 / sqrt(I(b_j)) + + Parameters: + ----------- + b_params : np.ndarray + Item difficulty parameters + thetas : np.ndarray + Student ability estimates + + Returns: + -------- + Optional[np.ndarray] + Standard errors for each b parameter, or None if calculation fails + """ + try: + n_items = len(b_params) + se = np.zeros(n_items) + + for j in range(n_items): + exponent = np.clip(thetas - b_params[j], -NUMERICAL_CLIP, NUMERICAL_CLIP) + p = 1.0 / (1.0 + np.exp(-exponent)) + + # Fisher information for item j + information = np.sum(p * (1 - p)) + + if information > 0: + se[j] = 1.0 / np.sqrt(information) + else: + se[j] = np.nan + + return se + except Exception as e: + logger.warning(f"Failed to calculate b SE batch: {e}") + return None + + +async def calibrate_item( + item_id: int, + db: AsyncSession, + min_sample_size: int = CALIBRATION_SAMPLE_THRESHOLD +) -> CalibrationResult: + """ + Calibrate a single item using IRT 1PL model. + + Fetches all UserAnswers for this item, builds response matrix, + estimates b-parameter using joint MLE, and updates the item. + + Parameters: + ----------- + item_id : int + Item ID to calibrate + db : AsyncSession + Database session + min_sample_size : int + Minimum sample size for calibration (default: 500) + + Returns: + -------- + CalibrationResult + Calibration result with status, b-parameter, SE, and sample size + """ + try: + # Fetch item + result = await db.execute(select(Item).where(Item.id == item_id)) + item = result.scalar_one_or_none() + + if not item: + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.FAILED, + message=f"Item {item_id} not found" + ) + + # Fetch all user answers for this item + result = await db.execute( + select(UserAnswer) + .where(UserAnswer.item_id == item_id) + .where(UserAnswer.is_correct.isnot(None)) + ) + answers = result.scalars().all() + + sample_size = len(answers) + + if sample_size < min_sample_size: + # Insufficient data - use CTT p-value for initial b estimate + if item.ctt_p is not None: + initial_b = estimate_b_from_ctt_p(item.ctt_p) + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.INSUFFICIENT_DATA, + irt_b=initial_b, + sample_size=sample_size, + message=f"Insufficient data ({sample_size} < {min_sample_size}). " + f"Using CTT-based initial estimate." + ) + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.INSUFFICIENT_DATA, + sample_size=sample_size, + message=f"Insufficient data ({sample_size} < {min_sample_size})" + ) + + # Build response matrix + # Group answers by session to create student x item matrix + session_responses = {} + for answer in answers: + session_id = answer.session_id + if session_id not in session_responses: + session_responses[session_id] = {} + session_responses[session_id][item_id] = 1 if answer.is_correct else 0 + + # Get all items answered by these sessions for joint calibration + session_ids = list(session_responses.keys()) + + if len(session_ids) < 10: + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.INSUFFICIENT_DATA, + sample_size=sample_size, + message="Not enough unique sessions for calibration" + ) + + # Fetch all items answered by these sessions + result = await db.execute( + select(UserAnswer) + .where(UserAnswer.session_id.in_(session_ids)) + .where(UserAnswer.is_correct.isnot(None)) + ) + all_answers = result.scalars().all() + + # Build full response matrix (sessions x items) + item_ids = sorted(set(a.item_id for a in all_answers)) + item_id_to_idx = {iid: idx for idx, iid in enumerate(item_ids)} + + responses_matrix = [] + for session_id in session_ids: + row = [0] * len(item_ids) + session_ans = [a for a in all_answers if a.session_id == session_id] + for ans in session_ans: + if ans.item_id in item_id_to_idx: + row[item_id_to_idx[ans.item_id]] = 1 if ans.is_correct else 0 + responses_matrix.append(row) + + # Run joint MLE calibration + b_params, se_params = estimate_b(responses_matrix) + + if b_params is None or len(b_params) == 0: + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.FAILED, + sample_size=sample_size, + message="MLE estimation failed" + ) + + # Get b and SE for our target item + target_idx = item_id_to_idx.get(item_id) + if target_idx is None: + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.FAILED, + sample_size=sample_size, + message="Item not found in response matrix" + ) + + irt_b = float(b_params[target_idx]) + irt_se = float(se_params[target_idx]) if se_params is not None else None + + # Validate result + if not (B_MIN <= irt_b <= B_MAX): + logger.warning(f"b-parameter {irt_b} out of range for item {item_id}") + irt_b = max(B_MIN, min(B_MAX, irt_b)) + + # Update item in database + item.irt_b = irt_b + item.irt_se = irt_se + item.calibration_sample_size = sample_size + item.calibrated = sample_size >= min_sample_size + + await db.commit() + + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.CONVERGED, + irt_b=irt_b, + irt_se=irt_se, + sample_size=sample_size, + message=f"Successfully calibrated with {sample_size} responses" + ) + + except Exception as e: + logger.error(f"Calibration failed for item {item_id}: {e}") + return CalibrationResult( + item_id=item_id, + status=CalibrationStatus.FAILED, + message=f"Calibration error: {str(e)}" + ) + + +async def calibrate_all( + tryout_id: str, + website_id: int, + db: AsyncSession, + min_sample_size: int = CALIBRATION_SAMPLE_THRESHOLD +) -> BatchCalibrationResult: + """ + Calibrate all items in a tryout using IRT 1PL model. + + Finds all uncalibrated items with sufficient responses, + runs calibration for each, and updates TryoutStats. + + Parameters: + ----------- + tryout_id : str + Tryout identifier + website_id : int + Website identifier + db : AsyncSession + Database session + min_sample_size : int + Minimum sample size for calibration (default: 500) + + Returns: + -------- + BatchCalibrationResult + Batch calibration result with status for each item + """ + results = [] + + try: + # Find all items for this tryout + result = await db.execute( + select(Item) + .where(Item.tryout_id == tryout_id) + .where(Item.website_id == website_id) + .order_by(Item.slot) + ) + items = result.scalars().all() + + total_items = len(items) + + if total_items == 0: + return BatchCalibrationResult( + tryout_id=tryout_id, + website_id=website_id, + total_items=0, + calibrated_items=0, + failed_items=0, + results=[], + ready_for_irt=False, + calibration_percentage=0.0 + ) + + # Get response counts per item + item_response_counts = {} + for item in items: + result = await db.execute( + select(func.count(UserAnswer.id)) + .where(UserAnswer.item_id == item.id) + ) + count = result.scalar() or 0 + item_response_counts[item.id] = count + + # Calibrate items with sufficient data + for item in items: + response_count = item_response_counts.get(item.id, 0) + + if response_count >= min_sample_size and not item.calibrated: + cal_result = await calibrate_item(item.id, db, min_sample_size) + results.append(cal_result) + elif item.calibrated: + # Already calibrated + results.append(CalibrationResult( + item_id=item.id, + status=CalibrationStatus.CONVERGED, + irt_b=item.irt_b, + irt_se=item.irt_se, + sample_size=item.calibration_sample_size, + message="Already calibrated" + )) + else: + # Insufficient data + results.append(CalibrationResult( + item_id=item.id, + status=CalibrationStatus.INSUFFICIENT_DATA, + sample_size=response_count, + message=f"Insufficient data ({response_count} < {min_sample_size})" + )) + + # Count results + calibrated_items = sum(1 for r in results if r.is_calibrated) + failed_items = sum(1 for r in results if r.status == CalibrationStatus.FAILED) + calibration_percentage = calibrated_items / total_items if total_items > 0 else 0.0 + + # Update TryoutStats if exists + try: + from app.models import TryoutStats + result = await db.execute( + select(TryoutStats) + .where(TryoutStats.tryout_id == tryout_id) + .where(TryoutStats.website_id == website_id) + ) + stats = result.scalar_one_or_none() + + if stats: + logger.info( + f"Tryout {tryout_id}: {calibrated_items}/{total_items} items calibrated " + f"({calibration_percentage:.1%})" + ) + except Exception as e: + logger.warning(f"Could not update TryoutStats: {e}") + + ready_for_irt = calibration_percentage >= IRT_ROLLOUT_THRESHOLD + + return BatchCalibrationResult( + tryout_id=tryout_id, + website_id=website_id, + total_items=total_items, + calibrated_items=calibrated_items, + failed_items=failed_items, + results=results, + ready_for_irt=ready_for_irt, + calibration_percentage=calibration_percentage + ) + + except Exception as e: + logger.error(f"Batch calibration failed for tryout {tryout_id}: {e}") + return BatchCalibrationResult( + tryout_id=tryout_id, + website_id=website_id, + total_items=len(results), + calibrated_items=sum(1 for r in results if r.is_calibrated), + failed_items=sum(1 for r in results if r.status == CalibrationStatus.FAILED), + results=results, + ready_for_irt=False, + calibration_percentage=0.0 + ) + + +def fallback_to_ctt(reason: str, context: Optional[dict] = None) -> dict: + """ + Generate fallback response for CTT mode when IRT fails. + + Provides graceful degradation mechanism with logging and + recommendation for scoring mode. + + Parameters: + ----------- + reason : str + Reason for fallback (insufficient_data, convergence_error, etc.) + context : Optional[dict] + Additional context (item_id, tryout_id, etc.) + + Returns: + -------- + dict + Fallback response with: + - fallback_mode: "ctt" + - reason: str + - recommendation: str + - context: dict + """ + context = context or {} + + recommendations = { + "insufficient_data": ( + "Continue collecting response data. " + f"Need {CALIBRATION_SAMPLE_THRESHOLD}+ responses per item for IRT calibration. " + "Use CTT scoring until threshold is reached." + ), + "convergence_error": ( + "MLE optimization failed to converge. " + "Check for response patterns (all correct/incorrect). " + "Use CTT scoring as fallback." + ), + "numerical_instability": ( + "Numerical instability detected in MLE calculation. " + "Verify data quality and response patterns. " + "Use CTT scoring as fallback." + ), + "missing_parameters": ( + "Required IRT parameters not available. " + "Ensure items are calibrated before using IRT mode. " + "Use CTT scoring until calibration is complete." + ), + "default": ( + "IRT scoring unavailable. " + "Falling back to CTT scoring mode. " + "Check logs for details." + ) + } + + recommendation = recommendations.get(reason, recommendations["default"]) + + logger.warning( + f"IRT fallback to CTT - Reason: {reason}, Context: {context}" + ) + + return { + "fallback_mode": "ctt", + "reason": reason, + "recommendation": recommendation, + "context": context, + "timestamp": datetime.utcnow().isoformat() + } + + +def validate_irt_parameters( + theta: Optional[float] = None, + b: Optional[float] = None, + se: Optional[float] = None +) -> tuple[bool, list[str]]: + """ + Validate IRT parameters against PRD constraints. + + Parameters: + ----------- + theta : Optional[float] + Ability estimate to validate + b : Optional[float] + Difficulty parameter to validate + se : Optional[float] + Standard error to validate + + Returns: + -------- + tuple[bool, list[str]] + (is_valid, list of error messages) + """ + errors = [] + + if theta is not None: + if not (THETA_MIN <= theta <= THETA_MAX): + errors.append(f"Theta {theta} out of range [{THETA_MIN}, {THETA_MAX}]") + + if b is not None: + if not (B_MIN <= b <= B_MAX): + errors.append(f"b-parameter {b} out of range [{B_MIN}, {B_MAX}]") + + if se is not None: + if se < 0: + errors.append(f"Standard error {se} must be non-negative") + elif se >= SE_PRECISION_THRESHOLD: + # Warning, not error - still valid but low precision + logger.warning(f"Standard error {se} exceeds precision threshold {SE_PRECISION_THRESHOLD}") + + return len(errors) == 0, errors + + +async def get_calibration_status( + tryout_id: str, + website_id: int, + db: AsyncSession +) -> dict: + """ + Get calibration status for a tryout. + + Parameters: + ----------- + tryout_id : str + Tryout identifier + website_id : int + Website identifier + db : AsyncSession + Database session + + Returns: + -------- + dict + Calibration status including: + - total_items: int + - calibrated_items: int + - calibration_percentage: float + - ready_for_irt: bool + - items: list of item status + """ + result = await db.execute( + select(Item) + .where(Item.tryout_id == tryout_id) + .where(Item.website_id == website_id) + .order_by(Item.slot) + ) + items = result.scalars().all() + + total_items = len(items) + calibrated_items = sum(1 for item in items if item.calibrated) + calibration_percentage = calibrated_items / total_items if total_items > 0 else 0.0 + ready_for_irt = calibration_percentage >= IRT_ROLLOUT_THRESHOLD + + item_status = [] + for item in items: + item_status.append({ + "item_id": item.id, + "slot": item.slot, + "level": item.level, + "calibrated": item.calibrated, + "irt_b": item.irt_b, + "irt_se": item.irt_se, + "calibration_sample_size": item.calibration_sample_size + }) + + return { + "tryout_id": tryout_id, + "website_id": website_id, + "total_items": total_items, + "calibrated_items": calibrated_items, + "calibration_percentage": round(calibration_percentage * 100, 1), + "ready_for_irt": ready_for_irt, + "items": item_status + } + + +# Export public API +__all__ = [ + # Constants + "THETA_MIN", + "THETA_MAX", + "B_MIN", + "B_MAX", + "CALIBRATION_SAMPLE_THRESHOLD", + "IRT_ROLLOUT_THRESHOLD", + "SE_PRECISION_THRESHOLD", + # Enums + "CalibrationStatus", + # Data classes + "CalibrationResult", + "BatchCalibrationResult", + # Exceptions + "IRTCalibrationError", + # Core functions + "estimate_theta_mle", + "estimate_b", + "calibrate_item", + "calibrate_all", + "fallback_to_ctt", + "validate_irt_parameters", + "get_calibration_status", + # Conversion functions + "estimate_b_from_ctt_p", + "theta_to_nn", + "nn_to_theta", + # Calculation functions + "calculate_probability", + "calculate_fisher_information", + "calculate_theta_se", + "calculate_item_information", +] diff --git a/app/services/normalization.py b/app/services/normalization.py new file mode 100644 index 0000000..506b7f3 --- /dev/null +++ b/app/services/normalization.py @@ -0,0 +1,538 @@ +""" +Dynamic Normalization Service. + +Implements dynamic normalization with real-time calculation of rataan and SB +for each tryout. Supports multiple normalization modes: +- Static: Use hardcoded rataan/SB from config +- Dynamic: Calculate rataan/SB from participant NM scores in real-time +- Hybrid: Use static until threshold reached, then switch to dynamic +""" + +import logging +import math +from datetime import datetime, timezone +from typing import Literal, Optional, Tuple + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.tryout import Tryout +from app.models.tryout_stats import TryoutStats + +logger = logging.getLogger(__name__) + + +async def calculate_dynamic_stats( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> Tuple[Optional[float], Optional[float]]: + """ + Calculate current dynamic stats (rataan and SB) from TryoutStats. + + Fetches current TryoutStats for this (tryout_id, website_id) pair + and returns the calculated rataan and SB values. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Tuple of (rataan, sb), both None if no stats exist + """ + result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = result.scalar_one_or_none() + + if stats is None: + return None, None + + return stats.rataan, stats.sb + + +async def update_dynamic_normalization( + db: AsyncSession, + website_id: int, + tryout_id: str, + nm: int, +) -> Tuple[float, float]: + """ + Update dynamic normalization with new NM score. + + Fetches current TryoutStats and incrementally updates it with the new NM: + - Increments participant_count by 1 + - Adds NM to total_nm_sum + - Adds NM² to total_nm_sq_sum + - Recalculates rataan and sb + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + nm: Nilai Mentah (raw score) to add + + Returns: + Tuple of updated (rataan, sb) + + Raises: + ValueError: If nm is out of valid range [0, 1000] + """ + if not 0 <= nm <= 1000: + raise ValueError(f"nm must be in range [0, 1000], got {nm}") + + result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = result.scalar_one_or_none() + + if stats is None: + # Initialize new stats record + stats = TryoutStats( + website_id=website_id, + tryout_id=tryout_id, + participant_count=1, + total_nm_sum=float(nm), + total_nm_sq_sum=float(nm * nm), + rataan=float(nm), + sb=0.0, # SD is 0 for single data point + min_nm=nm, + max_nm=nm, + last_calculated=datetime.now(timezone.utc), + ) + db.add(stats) + else: + # Incrementally update existing stats + stats.participant_count += 1 + stats.total_nm_sum += nm + stats.total_nm_sq_sum += nm * nm + + # Update min/max + if stats.min_nm is None or nm < stats.min_nm: + stats.min_nm = nm + if stats.max_nm is None or nm > stats.max_nm: + stats.max_nm = nm + + # Recalculate mean and SD + n = stats.participant_count + sum_nm = stats.total_nm_sum + sum_nm_sq = stats.total_nm_sq_sum + + # Mean = Σ NM / n + mean = sum_nm / n + stats.rataan = mean + + # Variance = (Σ NM² / n) - (mean)² + # Using population standard deviation + if n > 1: + variance = (sum_nm_sq / n) - (mean ** 2) + # Clamp variance to non-negative (handles floating point errors) + variance = max(0.0, variance) + stats.sb = math.sqrt(variance) + else: + stats.sb = 0.0 + + stats.last_calculated = datetime.now(timezone.utc) + + await db.flush() + + logger.info( + f"Updated dynamic normalization for tryout {tryout_id}, " + f"website {website_id}: participant_count={stats.participant_count}, " + f"rataan={stats.rataan:.2f}, sb={stats.sb:.2f}" + ) + + # rataan and sb are always set by this function + assert stats.rataan is not None + assert stats.sb is not None + return stats.rataan, stats.sb + + +def apply_normalization( + nm: int, + rataan: float, + sb: float, +) -> int: + """ + Apply normalization to NM to get NN (Nilai Nasional). + + Formula: NN = 500 + 100 × ((NM - Rataan) / SB) + + Normalizes scores to mean=500, SD=100 distribution. + + Args: + nm: Nilai Mentah (raw score) in range [0, 1000] + rataan: Mean of NM scores + sb: Standard deviation of NM scores + + Returns: + NN (normalized score) in range [0, 1000] + + Raises: + ValueError: If nm is out of range or sb is invalid + """ + if not 0 <= nm <= 1000: + raise ValueError(f"nm must be in range [0, 1000], got {nm}") + if sb <= 0: + # If SD is 0 or negative, return default normalized score + # This handles edge case where all scores are identical + return 500 + + # Calculate normalized score + z_score = (nm - rataan) / sb + nn = 500 + 100 * z_score + + # Round to integer and clamp to valid range [0, 1000] + nn_int = round(nn) + return max(0, min(1000, nn_int)) + + +async def get_normalization_mode( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> Literal["static", "dynamic", "hybrid"]: + """ + Get the current normalization mode for a tryout. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Normalization mode: "static", "dynamic", or "hybrid" + + Raises: + ValueError: If tryout not found + """ + result = await db.execute( + select(Tryout).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + tryout = result.scalar_one_or_none() + + if tryout is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + + return tryout.normalization_mode + + +async def check_threshold_for_dynamic( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> bool: + """ + Check if participant count meets threshold for dynamic normalization. + + Compares current participant_count with min_sample_for_dynamic from config. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + True if participant_count >= min_sample_for_dynamic, else False + """ + # Fetch current TryoutStats + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = stats_result.scalar_one_or_none() + current_participant_count = stats.participant_count if stats else 0 + + # Fetch min_sample_for_dynamic from config + tryout_result = await db.execute( + select(Tryout.min_sample_for_dynamic).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + min_sample = tryout_result.scalar_one_or_none() + + if min_sample is None: + # Default to 100 if not configured + min_sample = 100 + + return current_participant_count >= min_sample + + +async def get_normalization_params( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> Tuple[float, float, Literal["static", "dynamic"]]: + """ + Get normalization parameters (rataan, sb) based on current mode. + + Determines which normalization parameters to use: + - Static mode: Use config.static_rataan and config.static_sb + - Dynamic mode: Use calculated rataan and sb from TryoutStats + - Hybrid mode: Use static until threshold reached, then dynamic + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Tuple of (rataan, sb, mode_used) + + Raises: + ValueError: If tryout not found or dynamic stats unavailable + """ + # Get normalization mode + mode = await get_normalization_mode(db, website_id, tryout_id) + + if mode == "static": + # Use static values from config + result = await db.execute( + select(Tryout.static_rataan, Tryout.static_sb).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + row = result.scalar_one_or_none() + + if row is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + + rataan, sb = row + return rataan, sb, "static" + + elif mode == "dynamic": + # Use dynamic values from stats + rataan, sb = await calculate_dynamic_stats(db, website_id, tryout_id) + + if rataan is None or sb is None: + raise ValueError( + f"Dynamic normalization not available for tryout {tryout_id}. " + "No stats have been calculated yet." + ) + + if sb == 0: + logger.warning( + f"Standard deviation is 0 for tryout {tryout_id}. " + "All NM scores are identical." + ) + + return rataan, sb, "dynamic" + + else: # hybrid + # Check threshold + threshold_met = await check_threshold_for_dynamic(db, website_id, tryout_id) + + if threshold_met: + # Use dynamic values + rataan, sb = await calculate_dynamic_stats(db, website_id, tryout_id) + + if rataan is None or sb is None: + # Fallback to static if dynamic not available + result = await db.execute( + select(Tryout.static_rataan, Tryout.static_sb).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + row = result.scalar_one_or_none() + if row is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + rataan, sb = row + return rataan, sb, "static" + + return rataan, sb, "dynamic" + else: + # Use static values + result = await db.execute( + select(Tryout.static_rataan, Tryout.static_sb).where( + Tryout.website_id == website_id, + Tryout.tryout_id == tryout_id, + ) + ) + row = result.scalar_one_or_none() + if row is None: + raise ValueError( + f"Tryout {tryout_id} not found for website {website_id}" + ) + rataan, sb = row + return rataan, sb, "static" + + +async def calculate_skewness( + db: AsyncSession, + website_id: int, + tryout_id: str, +) -> Optional[float]: + """ + Calculate skewness of NM distribution for validation. + + Skewness measures the asymmetry of the probability distribution. + Values: + - Skewness ≈ 0: Symmetric distribution + - Skewness > 0: Right-skewed (tail to the right) + - Skewness < 0: Left-skewed (tail to the left) + + Formula: Skewness = (n / ((n-1)(n-2))) * Σ((x - mean) / SD)³ + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + + Returns: + Skewness value, or None if insufficient data + """ + result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = result.scalar_one_or_none() + + if stats is None or stats.participant_count < 3: + # Need at least 3 samples for skewness calculation + return None + + n = stats.participant_count + mean = stats.rataan + sd = stats.sb + + if sd == 0: + return 0.0 # All values are identical + + # Calculate skewness + # We need individual NM values, which we don't have in TryoutStats + # For now, return None as we need a different approach + # This would require storing all NM values or calculating on-the-fly + return None + + +async def validate_dynamic_normalization( + db: AsyncSession, + website_id: int, + tryout_id: str, + target_mean: float = 500.0, + target_sd: float = 100.0, + mean_tolerance: float = 5.0, + sd_tolerance: float = 5.0, +) -> Tuple[bool, dict]: + """ + Validate that dynamic normalization produces expected distribution. + + Checks if calculated rataan and sb are close to target values. + + Args: + db: Async database session + website_id: Website identifier + tryout_id: Tryout identifier + target_mean: Target mean (default: 500) + target_sd: Target standard deviation (default: 100) + mean_tolerance: Allowed deviation from target mean (default: 5) + sd_tolerance: Allowed deviation from target SD (default: 5) + + Returns: + Tuple of (is_valid, validation_details) + + validation_details contains: + - participant_count: Number of participants + - current_rataan: Current mean + - current_sb: Current standard deviation + - mean_deviation: Absolute deviation from target mean + - sd_deviation: Absolute deviation from target SD + - mean_within_tolerance: True if mean deviation < mean_tolerance + - sd_within_tolerance: True if SD deviation < sd_tolerance + - warnings: List of warning messages + - suggestions: List of suggestions + """ + # Get current stats + result = await db.execute( + select(TryoutStats).where( + TryoutStats.website_id == website_id, + TryoutStats.tryout_id == tryout_id, + ) + ) + stats = result.scalar_one_or_none() + + if stats is None or stats.rataan is None or stats.sb is None: + return False, { + "participant_count": 0, + "current_rataan": None, + "current_sb": None, + "mean_deviation": None, + "sd_deviation": None, + "mean_within_tolerance": False, + "sd_within_tolerance": False, + "warnings": ["No statistics available for validation"], + "suggestions": ["Wait for more participants to complete sessions"], + } + + # Calculate deviations + mean_deviation = abs(stats.rataan - target_mean) + sd_deviation = abs(stats.sb - target_sd) + + # Check tolerance + mean_within_tolerance = mean_deviation <= mean_tolerance + sd_within_tolerance = sd_deviation <= sd_tolerance + + is_valid = mean_within_tolerance and sd_within_tolerance + + # Generate warnings + warnings = [] + suggestions = [] + + if not mean_within_tolerance: + warnings.append(f"Mean deviation ({mean_deviation:.2f}) exceeds tolerance ({mean_tolerance})") + if stats.rataan > target_mean: + suggestions.append("Distribution may be right-skewed - consider checking question difficulty") + else: + suggestions.append("Distribution may be left-skewed - consider checking question difficulty") + + if not sd_within_tolerance: + warnings.append(f"SD deviation ({sd_deviation:.2f}) exceeds tolerance ({sd_tolerance})") + if stats.sb < target_sd: + suggestions.append("SD too low - scores may be too tightly clustered") + else: + suggestions.append("SD too high - scores may have too much variance") + + # Check for skewness + skewness = await calculate_skewness(db, website_id, tryout_id) + if skewness is not None and abs(skewness) > 0.5: + warnings.append(f"Distribution skewness ({skewness:.2f}) > 0.5 - distribution may be asymmetric") + suggestions.append("Consider using static normalization if dynamic normalization is unstable") + + # Check participant count + if stats.participant_count < 100: + suggestions.append(f"Participant count ({stats.participant_count}) below recommended minimum (100)") + + return is_valid, { + "participant_count": stats.participant_count, + "current_rataan": stats.rataan, + "current_sb": stats.sb, + "mean_deviation": mean_deviation, + "sd_deviation": sd_deviation, + "mean_within_tolerance": mean_within_tolerance, + "sd_within_tolerance": sd_within_tolerance, + "warnings": warnings, + "suggestions": suggestions, + } diff --git a/app/services/reporting.py b/app/services/reporting.py new file mode 100644 index 0000000..a54734a --- /dev/null +++ b/app/services/reporting.py @@ -0,0 +1,1449 @@ +""" +Reporting Service for IRT Bank Soal. + +Provides comprehensive reporting with 4 report types: +- Student performance reports (individual + aggregate) +- Item analysis reports (difficulty, discrimination, information functions) +- Calibration status reports (progress tracking, readiness metrics) +- Tryout comparison reports (across dates, across subjects) + +Export formats: CSV, Excel (.xlsx), PDF +""" + +import io +import math +from datetime import datetime, timezone, timedelta +from typing import Any, Dict, List, Literal, Optional, Union +from dataclasses import dataclass, field +import logging + +import pandas as pd +from sqlalchemy import select, func, and_, or_ +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.models.item import Item +from app.models.session import Session +from app.models.tryout import Tryout +from app.models.tryout_stats import TryoutStats +from app.models.user_answer import UserAnswer + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Report Data Classes +# ============================================================================= + +@dataclass +class StudentPerformanceRecord: + """Individual student performance record.""" + session_id: str + wp_user_id: str + tryout_id: str + NM: Optional[int] + NN: Optional[int] + theta: Optional[float] + theta_se: Optional[float] + total_benar: int + time_spent: int # Total time in seconds + start_time: Optional[datetime] + end_time: Optional[datetime] + scoring_mode_used: str + rataan_used: Optional[float] + sb_used: Optional[float] + + +@dataclass +class AggregatePerformanceStats: + """Aggregate statistics for student performance.""" + tryout_id: str + participant_count: int + avg_nm: Optional[float] + std_nm: Optional[float] + min_nm: Optional[int] + max_nm: Optional[int] + median_nm: Optional[float] + avg_nn: Optional[float] + std_nn: Optional[float] + avg_theta: Optional[float] + pass_rate: float # Percentage with NN >= 500 + avg_time_spent: float # Average time in seconds + + +@dataclass +class StudentPerformanceReport: + """Complete student performance report.""" + generated_at: datetime + tryout_id: str + website_id: int + date_range: Optional[Dict[str, datetime]] + aggregate: AggregatePerformanceStats + individual_records: List[StudentPerformanceRecord] + + +@dataclass +class ItemAnalysisRecord: + """Item analysis record for a single item.""" + item_id: int + slot: int + level: str + ctt_p: Optional[float] + ctt_bobot: Optional[float] + ctt_category: Optional[str] + irt_b: Optional[float] + irt_se: Optional[float] + calibrated: bool + calibration_sample_size: int + correctness_rate: float # Actual correctness from responses + item_total_correlation: Optional[float] + information_values: Dict[float, float] # theta -> information + optimal_theta_range: str # e.g., "-1 to 0" + + +@dataclass +class ItemAnalysisReport: + """Complete item analysis report.""" + generated_at: datetime + tryout_id: str + website_id: int + total_items: int + items: List[ItemAnalysisRecord] + summary: Dict[str, Any] + + +@dataclass +class CalibrationItemStatus: + """Calibration status for a single item.""" + item_id: int + slot: int + level: str + sample_size: int + calibrated: bool + irt_b: Optional[float] + irt_se: Optional[float] + ctt_p: Optional[float] + + +@dataclass +class CalibrationStatusReport: + """Complete calibration status report.""" + generated_at: datetime + tryout_id: str + website_id: int + total_items: int + calibrated_items: int + calibration_percentage: float + items_awaiting_calibration: List[CalibrationItemStatus] + avg_calibration_sample_size: float + estimated_time_to_90_percent: Optional[str] + ready_for_irt_rollout: bool + items: List[CalibrationItemStatus] + + +@dataclass +class TryoutComparisonRecord: + """Tryout comparison data point.""" + tryout_id: str + date: Optional[str] + subject: Optional[str] + participant_count: int + avg_nm: Optional[float] + avg_nn: Optional[float] + avg_theta: Optional[float] + std_nm: Optional[float] + calibration_percentage: float + + +@dataclass +class TryoutComparisonReport: + """Complete tryout comparison report.""" + generated_at: datetime + comparison_type: Literal["date", "subject"] + tryouts: List[TryoutComparisonRecord] + trends: Optional[Dict[str, Any]] + normalization_impact: Optional[Dict[str, Any]] + + +# ============================================================================= +# Helper Functions +# ============================================================================= + +def _calculate_item_information(theta: float, b: float) -> float: + """ + Calculate item information function at given theta for 1PL model. + + I(θ) = P(θ) * (1 - P(θ)) + where P(θ) = 1 / (1 + e^-(θ-b)) + """ + exponent = theta - b + exponent = max(-30, min(30, exponent)) + p = 1.0 / (1.0 + math.exp(-exponent)) + return p * (1 - p) + + +def _calculate_item_total_correlation( + item_responses: List[int], + total_scores: List[int] +) -> Optional[float]: + """ + Calculate item-total correlation (point-biserial correlation). + + Returns None if insufficient data. + """ + if len(item_responses) < 5 or len(total_scores) < 5: + return None + + n = len(item_responses) + if n != len(total_scores): + return None + + # Calculate means + item_mean = sum(item_responses) / n + total_mean = sum(total_scores) / n + + # Calculate standard deviations + item_var = sum((x - item_mean) ** 2 for x in item_responses) / n + total_var = sum((x - total_mean) ** 2 for x in total_scores) / n + + if item_var == 0 or total_var == 0: + return None + + item_std = math.sqrt(item_var) + total_std = math.sqrt(total_var) + + # Calculate correlation + covariance = sum( + (item_responses[i] - item_mean) * (total_scores[i] - total_mean) + for i in range(n) + ) / n + + correlation = covariance / (item_std * total_std) + return round(correlation, 4) + + +def _calculate_median(values: List[float]) -> Optional[float]: + """Calculate median of a list of values.""" + if not values: + return None + + sorted_values = sorted(values) + n = len(sorted_values) + + if n % 2 == 0: + return (sorted_values[n // 2 - 1] + sorted_values[n // 2]) / 2 + else: + return sorted_values[n // 2] + + +def _calculate_std(values: List[float]) -> Optional[float]: + """Calculate standard deviation of a list of values.""" + if not values or len(values) < 2: + return None + + n = len(values) + mean = sum(values) / n + variance = sum((x - mean) ** 2 for x in values) / n + return math.sqrt(variance) + + +# ============================================================================= +# Report Generation Functions +# ============================================================================= + +async def generate_student_performance_report( + tryout_id: str, + website_id: int, + db: AsyncSession, + date_range: Optional[Dict[str, datetime]] = None, + format_type: Literal["individual", "aggregate", "both"] = "both" +) -> StudentPerformanceReport: + """ + Generate student performance report. + + Args: + tryout_id: Tryout identifier + website_id: Website identifier + db: Database session + date_range: Optional date range filter {"start": datetime, "end": datetime} + format_type: Report format - individual, aggregate, or both + + Returns: + StudentPerformanceReport with aggregate stats and/or individual records + """ + # Build query for completed sessions + query = ( + select(Session) + .where( + Session.tryout_id == tryout_id, + Session.website_id == website_id, + Session.is_completed == True, + ) + ) + + # Apply date range filter if provided + if date_range: + if date_range.get("start"): + query = query.where(Session.start_time >= date_range["start"]) + if date_range.get("end"): + query = query.where(Session.start_time <= date_range["end"]) + + query = query.order_by(Session.NN.desc().nullslast()) + + result = await db.execute(query) + sessions = result.scalars().all() + + # Get total time spent for each session from user_answers + individual_records = [] + nm_values = [] + nn_values = [] + theta_values = [] + time_spent_values = [] + pass_count = 0 + + for session in sessions: + # Calculate total time spent from user_answers + time_result = await db.execute( + select(func.sum(UserAnswer.time_spent)).where( + UserAnswer.session_id == session.session_id + ) + ) + total_time = time_result.scalar() or 0 + + record = StudentPerformanceRecord( + session_id=session.session_id, + wp_user_id=session.wp_user_id, + tryout_id=session.tryout_id, + NM=session.NM, + NN=session.NN, + theta=session.theta, + theta_se=session.theta_se, + total_benar=session.total_benar, + time_spent=total_time, + start_time=session.start_time, + end_time=session.end_time, + scoring_mode_used=session.scoring_mode_used, + rataan_used=session.rataan_used, + sb_used=session.sb_used, + ) + individual_records.append(record) + + # Collect statistics + if session.NM is not None: + nm_values.append(float(session.NM)) + if session.NN is not None: + nn_values.append(float(session.NN)) + if session.NN >= 500: + pass_count += 1 + if session.theta is not None: + theta_values.append(session.theta) + time_spent_values.append(total_time) + + # Calculate aggregate statistics + participant_count = len(sessions) + pass_rate = (pass_count / participant_count * 100) if participant_count > 0 else 0.0 + avg_time = sum(time_spent_values) / len(time_spent_values) if time_spent_values else 0.0 + + aggregate = AggregatePerformanceStats( + tryout_id=tryout_id, + participant_count=participant_count, + avg_nm=sum(nm_values) / len(nm_values) if nm_values else None, + std_nm=_calculate_std(nm_values), + min_nm=int(min(nm_values)) if nm_values else None, + max_nm=int(max(nm_values)) if nm_values else None, + median_nm=_calculate_median(nm_values), + avg_nn=sum(nn_values) / len(nn_values) if nn_values else None, + std_nn=_calculate_std(nn_values), + avg_theta=sum(theta_values) / len(theta_values) if theta_values else None, + pass_rate=round(pass_rate, 2), + avg_time_spent=round(avg_time, 2), + ) + + return StudentPerformanceReport( + generated_at=datetime.now(timezone.utc), + tryout_id=tryout_id, + website_id=website_id, + date_range=date_range, + aggregate=aggregate, + individual_records=individual_records if format_type in ["individual", "both"] else [], + ) + + +async def generate_item_analysis_report( + tryout_id: str, + website_id: int, + db: AsyncSession, + filter_by: Optional[Literal["difficulty", "calibrated", "discrimination"]] = None, + difficulty_level: Optional[Literal["mudah", "sedang", "sulit"]] = None +) -> ItemAnalysisReport: + """ + Generate item analysis report. + + Args: + tryout_id: Tryout identifier + website_id: Website identifier + db: Database session + filter_by: Optional filter - difficulty, calibrated, or discrimination + difficulty_level: Filter by difficulty level if filter_by is "difficulty" + + Returns: + ItemAnalysisReport with item difficulty, discrimination, and information + """ + # Get all items for this tryout + query = ( + select(Item) + .where( + Item.tryout_id == tryout_id, + Item.website_id == website_id, + ) + .order_by(Item.slot) + ) + + if filter_by == "difficulty" and difficulty_level: + query = query.where(Item.level == difficulty_level) + elif filter_by == "calibrated": + query = query.where(Item.calibrated == True) + + result = await db.execute(query) + items = result.scalars().all() + + item_records = [] + theta_levels = [-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0] + + for item in items: + # Get correctness rate from actual responses + resp_result = await db.execute( + select( + func.count().label("total"), + func.sum(func.cast(UserAnswer.is_correct, type_=func.INTEGER)).label("correct") + ).where(UserAnswer.item_id == item.id) + ) + resp_stats = resp_result.first() + + correctness_rate = 0.0 + if resp_stats and resp_stats.total > 0: + correctness_rate = (resp_stats.correct or 0) / resp_stats.total + + # Calculate item-total correlation + # Get all responses for this item with session total scores + correlation_result = await db.execute( + select(UserAnswer, Session) + .join(Session, UserAnswer.session_id == Session.session_id) + .where( + UserAnswer.item_id == item.id, + Session.NN.isnot(None) + ) + ) + correlation_data = correlation_result.all() + + item_responses = [] + total_scores = [] + for ua, sess in correlation_data: + item_responses.append(1 if ua.is_correct else 0) + total_scores.append(sess.NN or 0) + + item_total_corr = _calculate_item_total_correlation(item_responses, total_scores) + + # Calculate information values at different theta levels + information_values = {} + if item.irt_b is not None: + for theta in theta_levels: + information_values[theta] = round( + _calculate_item_information(theta, item.irt_b), 4 + ) + + # Determine optimal theta range (where information is highest) + optimal_theta_range = "N/A" + if information_values: + max_info_theta = max(information_values.keys(), key=lambda t: information_values[t]) + # For 1PL model, max information is at theta = b + if item.irt_b is not None: + b = item.irt_b + if b < -1: + optimal_theta_range = "-3 to -1" + elif b < 0: + optimal_theta_range = "-1 to 0" + elif b < 1: + optimal_theta_range = "0 to 1" + else: + optimal_theta_range = "1 to 3" + + record = ItemAnalysisRecord( + item_id=item.id, + slot=item.slot, + level=item.level, + ctt_p=round(item.ctt_p, 4) if item.ctt_p is not None else None, + ctt_bobot=round(item.ctt_bobot, 4) if item.ctt_bobot is not None else None, + ctt_category=item.ctt_category, + irt_b=round(item.irt_b, 4) if item.irt_b is not None else None, + irt_se=round(item.irt_se, 4) if item.irt_se is not None else None, + calibrated=item.calibrated, + calibration_sample_size=item.calibration_sample_size, + correctness_rate=round(correctness_rate, 4), + item_total_correlation=item_total_corr, + information_values=information_values, + optimal_theta_range=optimal_theta_range, + ) + item_records.append(record) + + # Apply discrimination filter if requested + if filter_by == "discrimination": + # Filter items with high discrimination (correlation > 0.3) + item_records = [ + r for r in item_records + if r.item_total_correlation is not None and r.item_total_correlation > 0.3 + ] + + # Calculate summary statistics + avg_correctness = sum(r.correctness_rate for r in item_records) / len(item_records) if item_records else 0 + calibrated_count = sum(1 for r in item_records if r.calibrated) + high_discrimination = sum( + 1 for r in item_records + if r.item_total_correlation is not None and r.item_total_correlation > 0.3 + ) + + summary = { + "total_items": len(item_records), + "calibrated_items": calibrated_count, + "calibration_percentage": round(calibrated_count / len(item_records) * 100, 2) if item_records else 0, + "avg_correctness_rate": round(avg_correctness, 4), + "high_discrimination_items": high_discrimination, + "difficulty_distribution": { + "mudah": sum(1 for r in item_records if r.level == "mudah"), + "sedang": sum(1 for r in item_records if r.level == "sedang"), + "sulit": sum(1 for r in item_records if r.level == "sulit"), + } + } + + return ItemAnalysisReport( + generated_at=datetime.now(timezone.utc), + tryout_id=tryout_id, + website_id=website_id, + total_items=len(item_records), + items=item_records, + summary=summary, + ) + + +async def generate_calibration_status_report( + tryout_id: str, + website_id: int, + db: AsyncSession +) -> CalibrationStatusReport: + """ + Generate calibration status report. + + Args: + tryout_id: Tryout identifier + website_id: Website identifier + db: Database session + + Returns: + CalibrationStatusReport with calibration progress and readiness + """ + # Get all items for this tryout + result = await db.execute( + select(Item) + .where( + Item.tryout_id == tryout_id, + Item.website_id == website_id, + ) + .order_by(Item.slot) + ) + items = result.scalars().all() + + # Get tryout stats for response rate estimation + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.tryout_id == tryout_id, + TryoutStats.website_id == website_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + # Get tryout config for min_calibration_sample + tryout_result = await db.execute( + select(Tryout).where( + Tryout.tryout_id == tryout_id, + Tryout.website_id == website_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + min_sample = tryout.min_calibration_sample if tryout else 500 + + item_statuses = [] + items_awaiting = [] + total_sample_size = 0 + calibrated_count = 0 + + for item in items: + status = CalibrationItemStatus( + item_id=item.id, + slot=item.slot, + level=item.level, + sample_size=item.calibration_sample_size, + calibrated=item.calibrated, + irt_b=round(item.irt_b, 4) if item.irt_b is not None else None, + irt_se=round(item.irt_se, 4) if item.irt_se is not None else None, + ctt_p=round(item.ctt_p, 4) if item.ctt_p is not None else None, + ) + item_statuses.append(status) + total_sample_size += item.calibration_sample_size + + if item.calibrated: + calibrated_count += 1 + elif item.calibration_sample_size < min_sample: + items_awaiting.append(status) + + total_items = len(items) + calibration_percentage = (calibrated_count / total_items * 100) if total_items > 0 else 0 + avg_sample_size = total_sample_size / total_items if total_items > 0 else 0 + + # Estimate time to reach 90% calibration + estimated_time = None + if stats and calibration_percentage < 90: + # Calculate response rate (responses per day) + if stats.last_calculated: + days_since_start = max(1, (datetime.now(timezone.utc) - stats.last_calculated).days) + response_rate = stats.participant_count / days_since_start + + if response_rate > 0: + items_needed = int(total_items * 0.9) - calibrated_count + responses_needed = items_needed * min_sample + avg_responses_per_item = avg_sample_size if avg_sample_size > 0 else min_sample / 2 + + days_needed = responses_needed / (response_rate * avg_responses_per_item) if avg_responses_per_item > 0 else 0 + estimated_time = f"{int(days_needed)} days" + + ready_for_irt = calibration_percentage >= 90 + + return CalibrationStatusReport( + generated_at=datetime.now(timezone.utc), + tryout_id=tryout_id, + website_id=website_id, + total_items=total_items, + calibrated_items=calibrated_count, + calibration_percentage=round(calibration_percentage, 2), + items_awaiting_calibration=items_awaiting, + avg_calibration_sample_size=round(avg_sample_size, 2), + estimated_time_to_90_percent=estimated_time, + ready_for_irt_rollout=ready_for_irt, + items=item_statuses, + ) + + +async def generate_tryout_comparison_report( + tryout_ids: List[str], + website_id: int, + db: AsyncSession, + group_by: Literal["date", "subject"] = "date", + date_ranges: Optional[List[Dict[str, datetime]]] = None +) -> TryoutComparisonReport: + """ + Generate tryout comparison report. + + Args: + tryout_ids: List of tryout identifiers to compare + website_id: Website identifier + db: Database session + group_by: Group by date or subject + date_ranges: Optional date ranges for each tryout + + Returns: + TryoutComparisonReport comparing tryouts + """ + comparison_records = [] + normalization_impact = {} + + for i, tryout_id in enumerate(tryout_ids): + # Get tryout stats + stats_result = await db.execute( + select(TryoutStats).where( + TryoutStats.tryout_id == tryout_id, + TryoutStats.website_id == website_id, + ) + ) + stats = stats_result.scalar_one_or_none() + + # Get tryout config + tryout_result = await db.execute( + select(Tryout).where( + Tryout.tryout_id == tryout_id, + Tryout.website_id == website_id, + ) + ) + tryout = tryout_result.scalar_one_or_none() + + # Get calibration percentage + cal_result = await db.execute( + select( + func.count().label("total"), + func.sum(func.cast(Item.calibrated, type_=func.INTEGER)).label("calibrated") + ).where( + Item.tryout_id == tryout_id, + Item.website_id == website_id, + ) + ) + cal_stats = cal_result.first() + cal_percentage = 0.0 + if cal_stats and cal_stats.total > 0: + cal_percentage = (cal_stats.calibrated or 0) / cal_stats.total * 100 + + # Extract date/subject from tryout_id + # Tryout ID format: "mat_sd_week1", "bahasa_sma_week1" + date_str = None + subject = None + + if group_by == "subject": + # Extract subject from tryout_id (e.g., "mat_sd" -> "Matematika SD") + parts = tryout_id.split("_") + if len(parts) >= 2: + subject = f"{parts[0].upper()} {parts[1].upper()}" + else: + # Use tryout creation date or extract from ID + if tryout: + date_str = tryout.created_at.strftime("%Y-%m-%d") + + record = TryoutComparisonRecord( + tryout_id=tryout_id, + date=date_str, + subject=subject, + participant_count=stats.participant_count if stats else 0, + avg_nm=round(stats.rataan, 2) if stats and stats.rataan else None, + avg_nn=round(stats.rataan + 500, 2) if stats and stats.rataan else None, + avg_theta=None, # Would need to calculate from sessions + std_nm=round(stats.sb, 2) if stats and stats.sb else None, + calibration_percentage=round(cal_percentage, 2), + ) + comparison_records.append(record) + + # Track normalization impact + if tryout: + normalization_impact[tryout_id] = { + "mode": tryout.normalization_mode, + "static_rataan": tryout.static_rataan, + "static_sb": tryout.static_sb, + "dynamic_rataan": stats.rataan if stats else None, + "dynamic_sb": stats.sb if stats else None, + } + + # Calculate trends + trends = None + if group_by == "date" and len(comparison_records) > 1: + sorted_records = sorted( + [r for r in comparison_records if r.date], + key=lambda x: x.date + ) + if len(sorted_records) > 1: + first = sorted_records[0] + last = sorted_records[-1] + trends = { + "nm_trend": "increasing" if (last.avg_nm or 0) > (first.avg_nm or 0) else "decreasing", + "nm_change": round((last.avg_nm or 0) - (first.avg_nm or 0), 2), + "calibration_trend": "improving" if last.calibration_percentage > first.calibration_percentage else "stable", + } + + return TryoutComparisonReport( + generated_at=datetime.now(timezone.utc), + comparison_type=group_by, + tryouts=comparison_records, + trends=trends, + normalization_impact=normalization_impact if normalization_impact else None, + ) + + +# ============================================================================= +# Export Functions +# ============================================================================= + +def export_report_to_csv(report_data: Union[StudentPerformanceReport, ItemAnalysisReport, CalibrationStatusReport, TryoutComparisonReport], filename: str) -> str: + """ + Export report data to CSV format. + + Args: + report_data: Report data object + filename: Base filename (without extension) + + Returns: + Full path to generated CSV file + """ + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + full_filename = f"{filename}_{timestamp}.csv" + + # Convert report to DataFrame based on type + if isinstance(report_data, StudentPerformanceReport): + # Individual records + if report_data.individual_records: + records = [ + { + "session_id": r.session_id, + "wp_user_id": r.wp_user_id, + "tryout_id": r.tryout_id, + "NM": r.NM, + "NN": r.NN, + "theta": r.theta, + "theta_se": r.theta_se, + "total_benar": r.total_benar, + "time_spent_seconds": r.time_spent, + "start_time": r.start_time.isoformat() if r.start_time else None, + "end_time": r.end_time.isoformat() if r.end_time else None, + "scoring_mode": r.scoring_mode_used, + } + for r in report_data.individual_records + ] + df = pd.DataFrame(records) + else: + # Aggregate only + df = pd.DataFrame([{ + "tryout_id": report_data.aggregate.tryout_id, + "participant_count": report_data.aggregate.participant_count, + "avg_nm": report_data.aggregate.avg_nm, + "std_nm": report_data.aggregate.std_nm, + "min_nm": report_data.aggregate.min_nm, + "max_nm": report_data.aggregate.max_nm, + "median_nm": report_data.aggregate.median_nm, + "avg_nn": report_data.aggregate.avg_nn, + "std_nn": report_data.aggregate.std_nn, + "avg_theta": report_data.aggregate.avg_theta, + "pass_rate_percent": report_data.aggregate.pass_rate, + "avg_time_spent_seconds": report_data.aggregate.avg_time_spent, + }]) + + elif isinstance(report_data, ItemAnalysisReport): + records = [ + { + "item_id": r.item_id, + "slot": r.slot, + "level": r.level, + "ctt_p": r.ctt_p, + "ctt_bobot": r.ctt_bobot, + "ctt_category": r.ctt_category, + "irt_b": r.irt_b, + "irt_se": r.irt_se, + "calibrated": r.calibrated, + "sample_size": r.calibration_sample_size, + "correctness_rate": r.correctness_rate, + "item_total_correlation": r.item_total_correlation, + "optimal_theta_range": r.optimal_theta_range, + } + for r in report_data.items + ] + df = pd.DataFrame(records) + + elif isinstance(report_data, CalibrationStatusReport): + records = [ + { + "item_id": r.item_id, + "slot": r.slot, + "level": r.level, + "sample_size": r.sample_size, + "calibrated": r.calibrated, + "irt_b": r.irt_b, + "irt_se": r.irt_se, + "ctt_p": r.ctt_p, + } + for r in report_data.items + ] + df = pd.DataFrame(records) + + elif isinstance(report_data, TryoutComparisonReport): + records = [ + { + "tryout_id": r.tryout_id, + "date": r.date, + "subject": r.subject, + "participant_count": r.participant_count, + "avg_nm": r.avg_nm, + "avg_nn": r.avg_nn, + "avg_theta": r.avg_theta, + "std_nm": r.std_nm, + "calibration_percentage": r.calibration_percentage, + } + for r in report_data.tryouts + ] + df = pd.DataFrame(records) + + else: + raise ValueError(f"Unsupported report type: {type(report_data)}") + + df.to_csv(full_filename, index=False) + logger.info(f"Exported report to CSV: {full_filename}") + return full_filename + + +def export_report_to_excel(report_data: Union[StudentPerformanceReport, ItemAnalysisReport, CalibrationStatusReport, TryoutComparisonReport], filename: str) -> str: + """ + Export report data to Excel (.xlsx) format. + + Args: + report_data: Report data object + filename: Base filename (without extension) + + Returns: + Full path to generated Excel file + """ + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + full_filename = f"{filename}_{timestamp}.xlsx" + + with pd.ExcelWriter(full_filename, engine='openpyxl') as writer: + if isinstance(report_data, StudentPerformanceReport): + # Summary sheet + summary_df = pd.DataFrame([{ + "Report Generated": report_data.generated_at.isoformat(), + "Tryout ID": report_data.tryout_id, + "Website ID": report_data.website_id, + "Participant Count": report_data.aggregate.participant_count, + "Average NM": report_data.aggregate.avg_nm, + "Std Dev NM": report_data.aggregate.std_nm, + "Min NM": report_data.aggregate.min_nm, + "Max NM": report_data.aggregate.max_nm, + "Median NM": report_data.aggregate.median_nm, + "Average NN": report_data.aggregate.avg_nn, + "Std Dev NN": report_data.aggregate.std_nn, + "Average Theta": report_data.aggregate.avg_theta, + "Pass Rate (%)": report_data.aggregate.pass_rate, + "Avg Time (seconds)": report_data.aggregate.avg_time_spent, + }]) + summary_df.to_excel(writer, sheet_name="Summary", index=False) + + # Individual records sheet + if report_data.individual_records: + records_df = pd.DataFrame([ + { + "Session ID": r.session_id, + "User ID": r.wp_user_id, + "NM": r.NM, + "NN": r.NN, + "Theta": r.theta, + "Theta SE": r.theta_se, + "Correct Answers": r.total_benar, + "Time (seconds)": r.time_spent, + "Start Time": r.start_time.isoformat() if r.start_time else None, + "End Time": r.end_time.isoformat() if r.end_time else None, + "Scoring Mode": r.scoring_mode_used, + } + for r in report_data.individual_records + ]) + records_df.to_excel(writer, sheet_name="Individual Records", index=False) + + elif isinstance(report_data, ItemAnalysisReport): + # Summary sheet + summary_df = pd.DataFrame([report_data.summary]) + summary_df.to_excel(writer, sheet_name="Summary", index=False) + + # Items sheet + items_df = pd.DataFrame([ + { + "Item ID": r.item_id, + "Slot": r.slot, + "Level": r.level, + "CTT p": r.ctt_p, + "CTT Bobot": r.ctt_bobot, + "CTT Category": r.ctt_category, + "IRT b": r.irt_b, + "IRT SE": r.irt_se, + "Calibrated": r.calibrated, + "Sample Size": r.calibration_sample_size, + "Correctness Rate": r.correctness_rate, + "Item-Total Corr": r.item_total_correlation, + "Optimal Theta Range": r.optimal_theta_range, + } + for r in report_data.items + ]) + items_df.to_excel(writer, sheet_name="Items", index=False) + + # Information functions sheet + if report_data.items and report_data.items[0].information_values: + info_records = [] + for r in report_data.items: + if r.information_values: + for theta, info in r.information_values.items(): + info_records.append({ + "Item ID": r.item_id, + "Slot": r.slot, + "Theta": theta, + "Information": info, + }) + if info_records: + info_df = pd.DataFrame(info_records) + info_df.to_excel(writer, sheet_name="Information Functions", index=False) + + elif isinstance(report_data, CalibrationStatusReport): + # Summary sheet + summary_df = pd.DataFrame([{ + "Report Generated": report_data.generated_at.isoformat(), + "Tryout ID": report_data.tryout_id, + "Total Items": report_data.total_items, + "Calibrated Items": report_data.calibrated_items, + "Calibration %": report_data.calibration_percentage, + "Avg Sample Size": report_data.avg_calibration_sample_size, + "Est. Time to 90%": report_data.estimated_time_to_90_percent, + "Ready for IRT": report_data.ready_for_irt_rollout, + }]) + summary_df.to_excel(writer, sheet_name="Summary", index=False) + + # Items awaiting calibration sheet + if report_data.items_awaiting_calibration: + awaiting_df = pd.DataFrame([ + { + "Item ID": r.item_id, + "Slot": r.slot, + "Level": r.level, + "Sample Size": r.sample_size, + "Calibrated": r.calibrated, + "IRT b": r.irt_b, + "CTT p": r.ctt_p, + } + for r in report_data.items_awaiting_calibration + ]) + awaiting_df.to_excel(writer, sheet_name="Awaiting Calibration", index=False) + + # All items sheet + all_items_df = pd.DataFrame([ + { + "Item ID": r.item_id, + "Slot": r.slot, + "Level": r.level, + "Sample Size": r.sample_size, + "Calibrated": r.calibrated, + "IRT b": r.irt_b, + "IRT SE": r.irt_se, + "CTT p": r.ctt_p, + } + for r in report_data.items + ]) + all_items_df.to_excel(writer, sheet_name="All Items", index=False) + + elif isinstance(report_data, TryoutComparisonReport): + # Comparison sheet + comparison_df = pd.DataFrame([ + { + "Tryout ID": r.tryout_id, + "Date": r.date, + "Subject": r.subject, + "Participants": r.participant_count, + "Avg NM": r.avg_nm, + "Avg NN": r.avg_nn, + "Avg Theta": r.avg_theta, + "Std NM": r.std_nm, + "Calibration %": r.calibration_percentage, + } + for r in report_data.tryouts + ]) + comparison_df.to_excel(writer, sheet_name="Comparison", index=False) + + # Trends sheet + if report_data.trends: + trends_df = pd.DataFrame([report_data.trends]) + trends_df.to_excel(writer, sheet_name="Trends", index=False) + + # Normalization impact sheet + if report_data.normalization_impact: + norm_records = [] + for tryout_id, impact in report_data.normalization_impact.items(): + norm_records.append({ + "Tryout ID": tryout_id, + "Mode": impact.get("mode"), + "Static Rataan": impact.get("static_rataan"), + "Static SB": impact.get("static_sb"), + "Dynamic Rataan": impact.get("dynamic_rataan"), + "Dynamic SB": impact.get("dynamic_sb"), + }) + norm_df = pd.DataFrame(norm_records) + norm_df.to_excel(writer, sheet_name="Normalization Impact", index=False) + + logger.info(f"Exported report to Excel: {full_filename}") + return full_filename + + +def export_report_to_pdf(report_data: Union[StudentPerformanceReport, ItemAnalysisReport, CalibrationStatusReport, TryoutComparisonReport], filename: str) -> str: + """ + Export report data to PDF format with tables and charts. + + Args: + report_data: Report data object + filename: Base filename (without extension) + + Returns: + Full path to generated PDF file + """ + from reportlab.lib import colors + from reportlab.lib.pagesizes import letter, A4 + from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle + from reportlab.lib.units import inch + from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak + from reportlab.lib.enums import TA_CENTER, TA_LEFT + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + full_filename = f"{filename}_{timestamp}.pdf" + + doc = SimpleDocTemplate(full_filename, pagesize=A4) + styles = getSampleStyleSheet() + + # Custom styles + title_style = ParagraphStyle( + 'CustomTitle', + parent=styles['Heading1'], + fontSize=16, + alignment=TA_CENTER, + spaceAfter=20, + ) + heading_style = ParagraphStyle( + 'CustomHeading', + parent=styles['Heading2'], + fontSize=12, + spaceAfter=10, + ) + + elements = [] + + # Title + title = "Report" + if isinstance(report_data, StudentPerformanceReport): + title = f"Student Performance Report - {report_data.tryout_id}" + elif isinstance(report_data, ItemAnalysisReport): + title = f"Item Analysis Report - {report_data.tryout_id}" + elif isinstance(report_data, CalibrationStatusReport): + title = f"Calibration Status Report - {report_data.tryout_id}" + elif isinstance(report_data, TryoutComparisonReport): + title = "Tryout Comparison Report" + + elements.append(Paragraph(title, title_style)) + elements.append(Paragraph(f"Generated: {report_data.generated_at.strftime('%Y-%m-%d %H:%M:%S UTC')}", styles['Normal'])) + elements.append(Spacer(1, 20)) + + if isinstance(report_data, StudentPerformanceReport): + # Summary table + elements.append(Paragraph("Summary Statistics", heading_style)) + summary_data = [ + ["Metric", "Value"], + ["Participant Count", str(report_data.aggregate.participant_count)], + ["Average NM", str(report_data.aggregate.avg_nm or "N/A")], + ["Std Dev NM", str(report_data.aggregate.std_nm or "N/A")], + ["Min NM", str(report_data.aggregate.min_nm or "N/A")], + ["Max NM", str(report_data.aggregate.max_nm or "N/A")], + ["Median NM", str(report_data.aggregate.median_nm or "N/A")], + ["Average NN", str(report_data.aggregate.avg_nn or "N/A")], + ["Pass Rate", f"{report_data.aggregate.pass_rate}%"], + ["Avg Time (min)", f"{report_data.aggregate.avg_time_spent / 60:.1f}"], + ] + + summary_table = Table(summary_data, colWidths=[2*inch, 2*inch]) + summary_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 10), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.beige), + ('GRID', (0, 0), (-1, -1), 1, colors.black), + ])) + elements.append(summary_table) + + # Individual records (first 20) + if report_data.individual_records: + elements.append(Spacer(1, 20)) + elements.append(Paragraph("Individual Records (Top 20)", heading_style)) + + records_data = [["User ID", "NM", "NN", "Correct", "Time (min)"]] + for r in report_data.individual_records[:20]: + records_data.append([ + r.wp_user_id[:15] + "..." if len(r.wp_user_id) > 15 else r.wp_user_id, + str(r.NM or "N/A"), + str(r.NN or "N/A"), + str(r.total_benar), + f"{r.time_spent / 60:.1f}", + ]) + + records_table = Table(records_data, colWidths=[1.5*inch, 0.8*inch, 0.8*inch, 0.8*inch, 1*inch]) + records_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, -1), 8), + ('BOTTOMPADDING', (0, 0), (-1, 0), 8), + ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), + ])) + elements.append(records_table) + + elif isinstance(report_data, ItemAnalysisReport): + # Summary + elements.append(Paragraph("Item Analysis Summary", heading_style)) + summary_data = [ + ["Metric", "Value"], + ["Total Items", str(report_data.summary.get("total_items", 0))], + ["Calibrated Items", str(report_data.summary.get("calibrated_items", 0))], + ["Calibration %", f"{report_data.summary.get('calibration_percentage', 0)}%"], + ["Avg Correctness", f"{report_data.summary.get('avg_correctness_rate', 0):.2%}"], + ["High Discrimination", str(report_data.summary.get("high_discrimination_items", 0))], + ] + + summary_table = Table(summary_data, colWidths=[2*inch, 2*inch]) + summary_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 10), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.beige), + ('GRID', (0, 0), (-1, -1), 1, colors.black), + ])) + elements.append(summary_table) + + # Items table (first 25) + elements.append(Spacer(1, 20)) + elements.append(Paragraph("Items (First 25)", heading_style)) + + items_data = [["Slot", "Level", "CTT p", "IRT b", "Calibrated", "Corr Rate"]] + for r in report_data.items[:25]: + items_data.append([ + str(r.slot), + r.level, + f"{r.ctt_p:.2f}" if r.ctt_p else "N/A", + f"{r.irt_b:.2f}" if r.irt_b else "N/A", + "Yes" if r.calibrated else "No", + f"{r.correctness_rate:.2%}", + ]) + + items_table = Table(items_data, colWidths=[0.6*inch, 0.8*inch, 0.8*inch, 0.8*inch, 1*inch, 0.9*inch]) + items_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, -1), 8), + ('BOTTOMPADDING', (0, 0), (-1, 0), 8), + ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), + ])) + elements.append(items_table) + + elif isinstance(report_data, CalibrationStatusReport): + # Summary + elements.append(Paragraph("Calibration Status Summary", heading_style)) + summary_data = [ + ["Metric", "Value"], + ["Total Items", str(report_data.total_items)], + ["Calibrated Items", str(report_data.calibrated_items)], + ["Calibration %", f"{report_data.calibration_percentage}%"], + ["Avg Sample Size", f"{report_data.avg_calibration_sample_size:.0f}"], + ["Est. Time to 90%", report_data.estimated_time_to_90_percent or "N/A"], + ["Ready for IRT", "Yes" if report_data.ready_for_irt_rollout else "No"], + ] + + summary_table = Table(summary_data, colWidths=[2*inch, 2*inch]) + summary_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, 0), 10), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), colors.beige), + ('GRID', (0, 0), (-1, -1), 1, colors.black), + ])) + elements.append(summary_table) + + # Items awaiting calibration + if report_data.items_awaiting_calibration: + elements.append(Spacer(1, 20)) + elements.append(Paragraph(f"Items Awaiting Calibration ({len(report_data.items_awaiting_calibration)})", heading_style)) + + await_data = [["Slot", "Level", "Sample Size", "CTT p", "IRT b"]] + for r in report_data.items_awaiting_calibration[:25]: + await_data.append([ + str(r.slot), + r.level, + str(r.sample_size), + f"{r.ctt_p:.2f}" if r.ctt_p else "N/A", + f"{r.irt_b:.2f}" if r.irt_b else "N/A", + ]) + + await_table = Table(await_data, colWidths=[0.8*inch, 0.8*inch, 1.2*inch, 0.8*inch, 0.8*inch]) + await_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, -1), 8), + ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), + ])) + elements.append(await_table) + + elif isinstance(report_data, TryoutComparisonReport): + # Comparison table + elements.append(Paragraph("Tryout Comparison", heading_style)) + comp_data = [["Tryout ID", "Participants", "Avg NM", "Avg NN", "Calib %"]] + for r in report_data.tryouts: + comp_data.append([ + r.tryout_id[:20], + str(r.participant_count), + f"{r.avg_nm:.1f}" if r.avg_nm else "N/A", + f"{r.avg_nn:.1f}" if r.avg_nn else "N/A", + f"{r.calibration_percentage:.1f}%", + ]) + + comp_table = Table(comp_data, colWidths=[1.5*inch, 1*inch, 1*inch, 1*inch, 1*inch]) + comp_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, -1), 9), + ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), + ])) + elements.append(comp_table) + + # Trends + if report_data.trends: + elements.append(Spacer(1, 20)) + elements.append(Paragraph("Trends Analysis", heading_style)) + trends_data = [["Metric", "Value"]] + for key, value in report_data.trends.items(): + trends_data.append([key.replace("_", " ").title(), str(value)]) + + trends_table = Table(trends_data, colWidths=[2*inch, 2*inch]) + trends_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (-1, 0), colors.grey), + ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), + ('ALIGN', (0, 0), (-1, -1), 'CENTER'), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), + ('FONTSIZE', (0, 0), (-1, -1), 9), + ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), + ])) + elements.append(trends_table) + + # Build PDF + doc.build(elements) + logger.info(f"Exported report to PDF: {full_filename}") + return full_filename + + +# ============================================================================= +# Report Scheduling Models (for future Celery/APScheduler integration) +# ============================================================================= + +@dataclass +class ReportSchedule: + """Report schedule configuration.""" + schedule_id: str + report_type: Literal["student_performance", "item_analysis", "calibration_status", "tryout_comparison"] + schedule: Literal["daily", "weekly", "monthly"] + tryout_ids: List[str] + website_id: int + recipients: List[str] + format: Literal["csv", "xlsx", "pdf"] = "xlsx" + created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + last_run: Optional[datetime] = None + next_run: Optional[datetime] = None + is_active: bool = True + + +# In-memory store for scheduled reports (in production, use database) +_scheduled_reports: Dict[str, ReportSchedule] = {} + + +def schedule_report( + report_type: Literal["student_performance", "item_analysis", "calibration_status", "tryout_comparison"], + schedule: Literal["daily", "weekly", "monthly"], + tryout_ids: List[str], + website_id: int, + recipients: List[str], + export_format: Literal["csv", "xlsx", "pdf"] = "xlsx" +) -> str: + """ + Schedule a report for automatic generation. + + Args: + report_type: Type of report to generate + schedule: Schedule frequency + tryout_ids: List of tryout IDs for the report + website_id: Website identifier + recipients: List of email addresses to send report to + export_format: Export format for the report + + Returns: + Schedule ID + """ + import uuid + + schedule_id = str(uuid.uuid4()) + + # Calculate next run time + now = datetime.now(timezone.utc) + if schedule == "daily": + next_run = now + timedelta(days=1) + elif schedule == "weekly": + next_run = now + timedelta(weeks=1) + else: # monthly + next_run = now + timedelta(days=30) + + report_schedule = ReportSchedule( + schedule_id=schedule_id, + report_type=report_type, + schedule=schedule, + tryout_ids=tryout_ids, + website_id=website_id, + recipients=recipients, + format=export_format, + next_run=next_run, + ) + + _scheduled_reports[schedule_id] = report_schedule + logger.info(f"Scheduled report {schedule_id}: {report_type} {schedule}") + + return schedule_id + + +def get_scheduled_report(schedule_id: str) -> Optional[ReportSchedule]: + """Get a scheduled report by ID.""" + return _scheduled_reports.get(schedule_id) + + +def list_scheduled_reports(website_id: Optional[int] = None) -> List[ReportSchedule]: + """List all scheduled reports, optionally filtered by website.""" + reports = list(_scheduled_reports.values()) + if website_id: + reports = [r for r in reports if r.website_id == website_id] + return reports + + +def cancel_scheduled_report(schedule_id: str) -> bool: + """Cancel a scheduled report.""" + if schedule_id in _scheduled_reports: + del _scheduled_reports[schedule_id] + logger.info(f"Cancelled scheduled report {schedule_id}") + return True + return False + + +# Export public API +__all__ = [ + # Report generation functions + "generate_student_performance_report", + "generate_item_analysis_report", + "generate_calibration_status_report", + "generate_tryout_comparison_report", + # Export functions + "export_report_to_csv", + "export_report_to_excel", + "export_report_to_pdf", + # Report data classes + "StudentPerformanceReport", + "StudentPerformanceRecord", + "AggregatePerformanceStats", + "ItemAnalysisReport", + "ItemAnalysisRecord", + "CalibrationStatusReport", + "CalibrationItemStatus", + "TryoutComparisonReport", + "TryoutComparisonRecord", + # Scheduling + "ReportSchedule", + "schedule_report", + "get_scheduled_report", + "list_scheduled_reports", + "cancel_scheduled_report", +] diff --git a/app/services/wordpress_auth.py b/app/services/wordpress_auth.py new file mode 100644 index 0000000..9e1c75f --- /dev/null +++ b/app/services/wordpress_auth.py @@ -0,0 +1,456 @@ +""" +WordPress Authentication and User Synchronization Service. + +Handles: +- JWT token validation via WordPress REST API +- User synchronization from WordPress to local database +- Multi-site support via website_id isolation +""" + +import logging +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Optional + +import httpx +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.models.user import User +from app.models.website import Website + +logger = logging.getLogger(__name__) +settings = get_settings() + + +# Custom exceptions for WordPress integration +class WordPressAuthError(Exception): + """Base exception for WordPress authentication errors.""" + pass + + +class WordPressTokenInvalidError(WordPressAuthError): + """Raised when WordPress token is invalid or expired.""" + pass + + +class WordPressAPIError(WordPressAuthError): + """Raised when WordPress API is unreachable or returns error.""" + pass + + +class WordPressRateLimitError(WordPressAuthError): + """Raised when WordPress API rate limit is exceeded.""" + pass + + +class WebsiteNotFoundError(WordPressAuthError): + """Raised when website_id is not found in local database.""" + pass + + +@dataclass +class WordPressUserInfo: + """Data class for WordPress user information.""" + wp_user_id: str + username: str + email: str + display_name: str + roles: list[str] + raw_data: dict[str, Any] + + +@dataclass +class SyncStats: + """Data class for user synchronization statistics.""" + inserted: int + updated: int + total: int + errors: int + + +async def get_wordpress_api_base(website: Website) -> str: + """ + Get WordPress API base URL for a website. + + Args: + website: Website model instance + + Returns: + WordPress REST API base URL + """ + # Use website's site_url if configured, otherwise use global config + base_url = website.site_url.rstrip('/') + return f"{base_url}/wp-json" + + +async def verify_wordpress_token( + token: str, + website_id: int, + wp_user_id: str, + db: AsyncSession, +) -> Optional[WordPressUserInfo]: + """ + Verify WordPress JWT token and validate user identity. + + Calls WordPress REST API GET /wp/v2/users/me with Authorization header. + Verifies response contains matching wp_user_id. + Verifies website_id exists in local database. + + Args: + token: WordPress JWT authentication token + website_id: Website identifier for multi-site isolation + wp_user_id: Expected WordPress user ID to verify + db: Async database session + + Returns: + WordPressUserInfo if valid, None if invalid + + Raises: + WebsiteNotFoundError: If website_id doesn't exist + WordPressTokenInvalidError: If token is invalid + WordPressAPIError: If API is unreachable + WordPressRateLimitError: If rate limited + """ + # Verify website exists + website_result = await db.execute( + select(Website).where(Website.id == website_id) + ) + website = website_result.scalar_one_or_none() + + if website is None: + raise WebsiteNotFoundError(f"Website {website_id} not found") + + api_base = await get_wordpress_api_base(website) + url = f"{api_base}/wp/v2/users/me" + + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + } + + timeout = httpx.Timeout(10.0, connect=5.0) + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.get(url, headers=headers) + + if response.status_code == 401: + raise WordPressTokenInvalidError("Invalid or expired WordPress token") + + if response.status_code == 429: + raise WordPressRateLimitError("WordPress API rate limit exceeded") + + if response.status_code == 503: + raise WordPressAPIError("WordPress API service unavailable") + + if response.status_code != 200: + raise WordPressAPIError( + f"WordPress API error: {response.status_code} - {response.text}" + ) + + data = response.json() + + # Verify user ID matches + response_user_id = str(data.get("id", "")) + if response_user_id != str(wp_user_id): + logger.warning( + f"User ID mismatch: expected {wp_user_id}, got {response_user_id}" + ) + return None + + # Extract user info + user_info = WordPressUserInfo( + wp_user_id=response_user_id, + username=data.get("username", ""), + email=data.get("email", ""), + display_name=data.get("name", ""), + roles=data.get("roles", []), + raw_data=data, + ) + + return user_info + + except httpx.TimeoutException: + raise WordPressAPIError("WordPress API request timed out") + except httpx.ConnectError: + raise WordPressAPIError("Unable to connect to WordPress API") + except httpx.HTTPError as e: + raise WordPressAPIError(f"HTTP error communicating with WordPress: {str(e)}") + + +async def fetch_wordpress_users( + website: Website, + admin_token: str, + page: int = 1, + per_page: int = 100, +) -> list[dict[str, Any]]: + """ + Fetch users from WordPress API (requires admin token). + + Calls WordPress REST API GET /wp/v2/users with admin authorization. + + Args: + website: Website model instance + admin_token: WordPress admin JWT token + page: Page number for pagination + per_page: Number of users per page (max 100) + + Returns: + List of WordPress user data dictionaries + + Raises: + WordPressTokenInvalidError: If admin token is invalid + WordPressAPIError: If API is unreachable + WordPressRateLimitError: If rate limited + """ + api_base = await get_wordpress_api_base(website) + url = f"{api_base}/wp/v2/users" + + headers = { + "Authorization": f"Bearer {admin_token}", + "Accept": "application/json", + } + + params = { + "page": page, + "per_page": min(per_page, 100), + "context": "edit", # Get full user data + } + + timeout = httpx.Timeout(30.0, connect=10.0) + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.get(url, headers=headers, params=params) + + if response.status_code == 401: + raise WordPressTokenInvalidError("Invalid admin token for user sync") + + if response.status_code == 403: + raise WordPressTokenInvalidError( + "Admin token lacks permission to list users" + ) + + if response.status_code == 429: + raise WordPressRateLimitError("WordPress API rate limit exceeded") + + if response.status_code == 503: + raise WordPressAPIError("WordPress API service unavailable") + + if response.status_code != 200: + raise WordPressAPIError( + f"WordPress API error: {response.status_code} - {response.text}" + ) + + return response.json() + + except httpx.TimeoutException: + raise WordPressAPIError("WordPress API request timed out") + except httpx.ConnectError: + raise WordPressAPIError("Unable to connect to WordPress API") + except httpx.HTTPError as e: + raise WordPressAPIError(f"HTTP error communicating with WordPress: {str(e)}") + + +async def sync_wordpress_users( + website_id: int, + admin_token: str, + db: AsyncSession, +) -> SyncStats: + """ + Synchronize users from WordPress to local database. + + Fetches all users from WordPress API and performs upsert: + - Updates existing users + - Inserts new users + + Args: + website_id: Website identifier for multi-site isolation + admin_token: WordPress admin JWT token + db: Async database session + + Returns: + SyncStats with insertion/update counts + + Raises: + WebsiteNotFoundError: If website_id doesn't exist + WordPressTokenInvalidError: If admin token is invalid + WordPressAPIError: If API is unreachable + """ + # Verify website exists + website_result = await db.execute( + select(Website).where(Website.id == website_id) + ) + website = website_result.scalar_one_or_none() + + if website is None: + raise WebsiteNotFoundError(f"Website {website_id} not found") + + # Fetch existing users from local database + existing_users_result = await db.execute( + select(User).where(User.website_id == website_id) + ) + existing_users = { + str(user.wp_user_id): user + for user in existing_users_result.scalars().all() + } + + # Fetch users from WordPress (with pagination) + all_wp_users = [] + page = 1 + per_page = 100 + + while True: + wp_users = await fetch_wordpress_users( + website, admin_token, page, per_page + ) + + if not wp_users: + break + + all_wp_users.extend(wp_users) + + # Check if more pages + if len(wp_users) < per_page: + break + + page += 1 + + # Sync users + inserted = 0 + updated = 0 + errors = 0 + + for wp_user in all_wp_users: + try: + wp_user_id = str(wp_user.get("id", "")) + + if not wp_user_id: + errors += 1 + continue + + if wp_user_id in existing_users: + # Update existing user (timestamp update) + existing_user = existing_users[wp_user_id] + existing_user.updated_at = datetime.now(timezone.utc) + updated += 1 + else: + # Insert new user + new_user = User( + wp_user_id=wp_user_id, + website_id=website_id, + created_at=datetime.now(timezone.utc), + updated_at=datetime.now(timezone.utc), + ) + db.add(new_user) + inserted += 1 + + except Exception as e: + logger.error(f"Error syncing user {wp_user.get('id')}: {e}") + errors += 1 + + await db.commit() + + total = inserted + updated + + logger.info( + f"WordPress user sync complete for website {website_id}: " + f"{inserted} inserted, {updated} updated, {errors} errors" + ) + + return SyncStats( + inserted=inserted, + updated=updated, + total=total, + errors=errors, + ) + + +async def get_wordpress_user( + wp_user_id: str, + website_id: int, + db: AsyncSession, +) -> Optional[User]: + """ + Get user from local database by WordPress user ID and website ID. + + Args: + wp_user_id: WordPress user ID + website_id: Website identifier for multi-site isolation + db: Async database session + + Returns: + User object if found, None otherwise + """ + result = await db.execute( + select(User).where( + User.wp_user_id == wp_user_id, + User.website_id == website_id, + ) + ) + return result.scalar_one_or_none() + + +async def verify_website_exists( + website_id: int, + db: AsyncSession, +) -> Website: + """ + Verify website exists in database. + + Args: + website_id: Website identifier + db: Async database session + + Returns: + Website model instance + + Raises: + WebsiteNotFoundError: If website doesn't exist + """ + result = await db.execute( + select(Website).where(Website.id == website_id) + ) + website = result.scalar_one_or_none() + + if website is None: + raise WebsiteNotFoundError(f"Website {website_id} not found") + + return website + + +async def get_or_create_user( + wp_user_id: str, + website_id: int, + db: AsyncSession, +) -> User: + """ + Get existing user or create new one if not exists. + + Args: + wp_user_id: WordPress user ID + website_id: Website identifier + db: Async database session + + Returns: + User model instance + """ + existing = await get_wordpress_user(wp_user_id, website_id, db) + + if existing: + return existing + + # Create new user + new_user = User( + wp_user_id=wp_user_id, + website_id=website_id, + created_at=datetime.now(timezone.utc), + updated_at=datetime.now(timezone.utc), + ) + db.add(new_user) + await db.commit() + await db.refresh(new_user) + + return new_user diff --git a/handoff.md b/handoff.md new file mode 100644 index 0000000..c6fe16b --- /dev/null +++ b/handoff.md @@ -0,0 +1,96 @@ +--- +## HANDOFF CONTEXT +GOAL +---- +Continue implementation of IRT-Powered Adaptive Question Bank System after user configures GLM-5 model mapping for specific subagent categories. +WORK COMPLETED +-------------- +- Created comprehensive PRD (v1.1) from project-brief.md +- Resolved 10 critical clarification questions with client: + 1. Excel Import: Standardized across ALL tryouts + 2. AI Generation: 1 request = 1 question, admin playground for testing, no approval workflow + 3. Normalization: Optional manual/automatic control (system handles auto when sufficient data) + 4. Rollback: Preserve IRT history, apply CTT to new sessions only + 5. Admin Permissions: Not needed (WordPress handles per-site admins) + 6. Dashboards: FastAPI Admin only + 7. Rate Limiting: User-level reuse check + AI generation toggle + 8. Student UX: Admin sees internal metrics, students only see primary score + 9. Data Retention: Keep all data + 10. Reporting: All 4 report types required +- Created detailed technical implementation plan with 10 parallel subagents: + - Deep Agent 1: Core API + CTT Scoring + - Deep Agent 2: IRT Calibration Engine (recommended for GLM-5) + - Deep Agent 3: CAT Selection Logic (recommended for GLM-5) + - Deep Agent 4: AI Generation + OpenRouter (recommended for GLM-5) + - Deep Agent 5: WordPress Integration + - Deep Agent 6: Reporting System (recommended for GLM-5) + - Unspecified-High Agents: Database Schema, Excel Import/Export, Admin Panel, Normalization +CURRENT STATE +------------- +- PRD.md file created (746 lines, v1.1) +- project-brief.md exists (reference document) +- No code implementation started yet +- No git repository initialized +- Working directory: /Users/dwindown/Applications/tryout-system +- Session ID: ses_2f1bf9e3cffes96exBxyheOiYT +PENDING TASKS +------------- +1. User configures GLM-5 model mapping for `deep` category (GLM-5 for algorithmic complexity) +2. User configures GLM-4.7 model mapping for `unspecified-high` category (general implementation) +3. Initialize git repository +4. Create project structure (app/, models/, routers/, services/, tests/) +5. Launch Unspecified-High Agent 1: Database Schema + ORM (BLOCKS all other agents) +6. After schema complete: Launch Deep Agents 1-3 in parallel (Core API, IRT Calibration, CAT Selection) +7. Launch Deep Agents 4-6 + Unspecified-High Agents 2-4 in parallel (AI Generation, WordPress, Reporting, Excel, Admin, Normalization) +8. Integration testing and validation +KEY FILES +--------- +- PRD.md - Complete product requirements document (v1.1, 746 lines) +- project-brief.md - Original technical specification reference +IMPORTANT DECISIONS +------------------- +- 1 request = 1 question for AI generation (no batch) +- Admin playground for AI testing (no approval workflow for student tests) +- Normalization: Admin chooses manual/automatic; system handles auto when data sufficient +- Rollback: Keep IRT historical scores, apply CTT only to new sessions +- No admin permissions system (WordPress handles per-site admin access) +- FastAPI Admin only (no custom dashboards) +- Global AI generation toggle for cost control +- User-level question reuse check (prevent duplicate difficulty exposure) +- Admin sees internal metrics, students only see primary score +- Keep all data indefinitely +- All 4 report types required (Student, Item, Calibration, Tryout comparison) +EXPLICIT CONSTRAINTS +-------------------- +- Excel format is standardized across ALL tryouts (strict parser) +- CTT formulas must match client Excel 100% (p = Σ Benar / Total Peserta) +- IRT 1PL Rasch model only (b parameter, no a/c initially) +- θ and b ∈ [-3, +3], NM and NN ∈ [0, 1000] +- Normalization target: Mean=500±5, SD=100±5 +- Tech stack: FastAPI, PostgreSQL, SQLAlchemy, FastAPI Admin, OpenRouter (Qwen3 Coder 480B / Llama 3.3 70B) +- Deployment: aaPanel VPS with Python Manager +- No type error suppression (no `as any`, `@ts-ignore`) +- Zero disruption to existing operations (non-destructive, additive) +GLM-5 MODEL ALLOCATION RECOMMENDATION +----------------------------------- +Use GLM-5 for: +- Deep Agent 2: IRT Calibration Engine (mathematical algorithms, sparse data handling) +- Deep Agent 3: CAT Selection Logic (adaptive algorithms, termination conditions) +- Deep Agent 4: AI Generation + OpenRouter (prompt engineering, robust parsing) +- Deep Agent 6: Reporting System (complex aggregation, multi-dimensional analysis) +Use GLM-4.7 for: +- Deep Agent 1: Core API + CTT Scoring (straightforward formulas) +- Deep Agent 5: WordPress Integration (standard REST API) +- Unspecified-High Agents: Database Schema, Excel Import/Export, Admin Panel, Normalization (well-defined tasks) +NOTE: Model mapping is controlled by category configuration in system, not by direct model specification in task() function. +CONTEXT FOR CONTINUATION +------------------------ +- User is currently configuring GLM-5 model mapping for specific categories +- After model mapping is configured, implementation should start with Database Schema (Unspecified-High Agent 1) as it blocks all other work +- Parallel execution strategy: Never run sequential when parallel is possible - all independent work units run simultaneously +- Use `task(category="...", load_skills=[], run_in_background=true)` pattern for parallel delegation +- All delegated work must include: TASK, EXPECTED OUTCOME, REQUIRED TOOLS, MUST DO, MUST NOT DO, CONTEXT (6-section prompt structure) +- Verify results after delegation: DOES IT WORK? DOES IT FOLLOW PATTERNS? EXPECTED RESULT ACHIEVED? +- Run `lsp_diagnostics` on changed files before marking tasks complete +- This is NOT a git repository yet - will need to initialize before any version control operations +--- diff --git a/irt_1pl_mle.py b/irt_1pl_mle.py new file mode 100644 index 0000000..ea56ca9 --- /dev/null +++ b/irt_1pl_mle.py @@ -0,0 +1,135 @@ +""" +IRT 1PL (Rasch Model) Maximum Likelihood Estimation +""" +import numpy as np +from scipy.optimize import minimize_scalar, minimize + + +def estimate_theta(responses, b_params): + """ + Estimate student ability theta using MLE for 1PL IRT model. + + Parameters: + ----------- + responses : list or array + Binary responses [0, 1, 1, 0, ...] + b_params : list or array + Item difficulty parameters [b1, b2, b3, ...] + + Returns: + -------- + float + Estimated theta (ability), or None if estimation fails + """ + responses = np.asarray(responses, dtype=float) + b_params = np.asarray(b_params, dtype=float) + + # Edge case: empty or mismatched inputs + if len(responses) == 0 or len(b_params) == 0: + return 0.0 + if len(responses) != len(b_params): + raise ValueError("responses and b_params must have same length") + + n = len(responses) + sum_resp = np.sum(responses) + + # Edge case: all correct - return high theta + if sum_resp == n: + return 4.0 + + # Edge case: all incorrect - return low theta + if sum_resp == 0: + return -4.0 + + def neg_log_likelihood(theta): + """Negative log-likelihood for minimization.""" + exponent = theta - b_params + # Numerical stability: clip exponent + exponent = np.clip(exponent, -30, 30) + p = 1.0 / (1.0 + np.exp(-exponent)) + # Avoid log(0) + p = np.clip(p, 1e-10, 1 - 1e-10) + ll = np.sum(responses * np.log(p) + (1 - responses) * np.log(1 - p)) + return -ll + + result = minimize_scalar(neg_log_likelihood, bounds=(-6, 6), method='bounded') + + if result.success: + return float(result.x) + return 0.0 + + +def estimate_b(responses_matrix): + """ + Estimate item difficulty parameters using joint MLE for 1PL IRT model. + + Parameters: + ----------- + responses_matrix : 2D array + Response matrix where rows=students, cols=items + entries are 0 or 1 + + Returns: + -------- + numpy.ndarray + Estimated b parameters for each item, or None if estimation fails + """ + responses_matrix = np.asarray(responses_matrix, dtype=float) + + # Edge case: empty matrix + if responses_matrix.size == 0: + return np.array([]) + + if responses_matrix.ndim != 2: + raise ValueError("responses_matrix must be 2-dimensional") + + n_students, n_items = responses_matrix.shape + + if n_students == 0 or n_items == 0: + return np.zeros(n_items) + + # Initialize theta and b + theta = np.zeros(n_students) + b = np.zeros(n_items) + + # Check for items with all same responses + item_sums = np.sum(responses_matrix, axis=0) + + for iteration in range(20): # EM iterations + # Update theta for each student + for i in range(n_students): + resp_i = responses_matrix[i, :] + sum_resp = np.sum(resp_i) + + if sum_resp == n_items: + theta[i] = 4.0 + elif sum_resp == 0: + theta[i] = -4.0 + else: + def neg_ll_student(t): + exponent = np.clip(t - b, -30, 30) + p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10) + return -np.sum(resp_i * np.log(p) + (1 - resp_i) * np.log(1 - p)) + + res = minimize_scalar(neg_ll_student, bounds=(-6, 6), method='bounded') + theta[i] = res.x if res.success else 0.0 + + # Update b for each item + for j in range(n_items): + resp_j = responses_matrix[:, j] + sum_resp = np.sum(resp_j) + + if sum_resp == n_students: + b[j] = -4.0 # Easy item + elif sum_resp == 0: + b[j] = 4.0 # Hard item + else: + def neg_ll_item(bj): + exponent = np.clip(theta - bj, -30, 30) + p = np.clip(1.0 / (1.0 + np.exp(-exponent)), 1e-10, 1 - 1e-10) + return -np.sum(resp_j * np.log(p) + (1 - resp_j) * np.log(1 - p)) + + res = minimize_scalar(neg_ll_item, bounds=(-6, 6), method='bounded') + b[j] = res.x if res.success else 0.0 + + return b diff --git a/project-brief.md b/project-brief.md new file mode 100644 index 0000000..c7d8be1 --- /dev/null +++ b/project-brief.md @@ -0,0 +1,1109 @@ +# IRT-Powered Adaptive Question Bank System + +## Final Project Brief \& Technical Specification + +**Project Name:** IRT Bank Soal (Adaptive Question Bank with AI Generation) +**Client:** Sejoli Tryout Multi-Website Platform +**Tech Stack:** FastAPI + PostgreSQL + SQLAlchemy + FastAPI Admin + OpenRouter AI +**Deployment:** aaPanel VPS (Python Manager + PgSQL Manager) +**Version:** 1.2.0 Final (Hybrid CTT+IRT + Dynamic Normalization) +**Last Updated:** March 21, 2026, 9:31 AM WIB + +*** + +## 🎯 Executive Summary + +Sistem bank soal adaptif **hybrid** yang FULLY COMPATIBLE dengan Excel klien existing, dengan enhancement untuk: + +- **Classical Test Theory (CTT)** - EXACT formula dari screenshot Excel klien (p, bobot, NM, NN) +- **Item Response Theory (IRT)** - Modern adaptive testing dengan theta estimation +- **AI Generation** - Auto-generate soal variants Mudah/Sulit via OpenRouter (Qwen3 Coder 480B) +- **Dynamic Normalization** - Rataan/SB calculated real-time atau manual input +- **Multi-Website Support** - 1 backend untuk N WordPress sites (Mat SD, Bahasa SMA, dll) +- **Non-Destructive** - 100% backward compatible dengan cara kerja klien sekarang + +**Core Capabilities:** + +1. Dual Scoring Mode: CTT (p, bobot) \& IRT (θ, b) berjalan paralel +2. Screenshot Compatible: Import langsung dari Excel klien (p=140/458) +3. Exact Formula Match: Implementasi persis formula Excel klien +4. Dynamic Normalization: Auto-calculate rataan/SB atau static mode +5. AI Question Generation: Generate Mudah/Sulit dari basis Sedang (CTT) +6. Full Audit Trail: Track CTT→IRT transition per item + +*** + +## 📋 Exact Client Formulas (From Excel Analysis) + +### STEP 1: Tingkat Kesukaran (TK) per Soal + +``` +Formula: p = Σ Benar / Total Peserta + +Excel: =D464/$A$463 +├─ D464 = Jumlah siswa yang jawab benar soal 1 +└─ A463 = Total peserta (e.g., 458) + +Example: p = 140/458 = 0.3057 → "Sedang" +``` + + +### STEP 2: Bobot per Soal + +``` +Formula: Bobot = 1 - p + +Excel: =1-D4 + +Example: Bobot = 1 - 0.3057 = 0.6943 + +Interpretation: +- Soal mudah (p=0.8) → bobot=0.2 (nilai rendah) +- Soal sulit (p=0.1) → bobot=0.9 (nilai tinggi) +``` + + +### STEP 3: Total Benar per Siswa + +``` +Formula: Total_Benar = COUNT(jawaban benar) + +Excel: =SUM(D454:W454) [20 soal] + +Example: Siswa benar 15 soal → Total_Benar = 15 +``` + + +### STEP 4: Total Bobot Earned per Siswa + +``` +Formula: Total_Bobot = Σ (bobot_soal × jawaban_siswa) + +Excel: =SUMPRODUCT($D$5:$W$5, D454:W454) +├─ $D$5:$W$5 = Array bobot [0.69, 0.85, 0.42, ...] +└─ D454:W454 = Jawaban [1, 1, 0, 1, ...] + +Example: + Soal 1: bobot=0.69 × jawaban=1 → 0.69 + Soal 2: bobot=0.85 × jawaban=1 → 0.85 + Soal 3: bobot=0.42 × jawaban=0 → 0.00 + ... + Total_Bobot = 12.5 +``` + + +### STEP 5: Nilai Mentah (NM) [0-1000 scale] + +``` +Formula: NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000 + +Excel: =(Y454/$X$5)*1000 +├─ Y454 = Total bobot siswa (e.g., 12.5) +└─ $X$5 = Total bobot maksimum (sum semua bobot, 18.3) + +Example: NM = (12.5 / 18.3) × 1000 = 683 +Range: 0-1000 (percentage-like scale) +``` + + +### STEP 6: Nilai Nasional (NN) - Z-Score Normalized + +``` +Formula: NN = 500 + 100 × ((NM - Rataan) / SB) + +Excel: =500+(100*((Z454-500)/100)) + +Components: +- 500 = Target mean (center point) +- 100 = Target standard deviation +- Rataan = Actual mean of NM from all participants +- SB = Actual standard deviation of NM + +⚠️ CURRENT CLIENT ISSUE: +Rataan = 500 (hardcoded) → NN = 500 + (NM - 500) = NM +SB = 100 (hardcoded) +Result: NO actual normalization (NN always equals NM) + +✅ OUR FIX: Dynamic calculation with 3 modes +``` + + +### Kategori Kesulitan (CTT Standard) + +``` +Tingkat Kesukaran (p): +p < 0.30 → Sukar (Difficult) +0.30 ≤ p ≤ 0.70 → Sedang (Medium) +p > 0.70 → Mudah (Easy) + +Bobot Implications: +p=0.09 → Bobot=0.91 (Sukar, high weight) +p=0.50 → Bobot=0.50 (Sedang, medium weight) +p=0.85 → Bobot=0.15 (Mudah, low weight) +``` + + +*** + +## 🔄 CTT vs IRT: Understanding Both Approaches + +### Classical Test Theory (CTT) - Client Method + +**Kelebihan CTT:** + +- Mudah dipahami admin/guru +- Tidak butuh banyak data (minimal 100 siswa) +- Compatible dengan sistem existing +- Cepat dihitung +- Formula transparent (visible in Excel) + +**Keterbatasan CTT:** + +- Sample-dependent (p berubah tiap kelompok) +- Tidak adaptive (soal fixed order) +- Butuh soal baru tiap tes (tidak bisa reuse efisien) +- Normalization issue (jika rataan/SB hardcoded) + + +### Item Response Theory (IRT) - Modern Adaptive + +**Core Formula (1PL Rasch):** + +``` +P(θ) = 1 / (1 + e^-(θ - b)) + +θ = Kemampuan user (-3 to +3) +b = Kesulitan item (-3 to +3) + +θ = -2 (lemah) → P(correct) di b=-1 = 73% +θ = 0 (average) → P(correct) di b=0 = 50% +θ = +2 (kuat) → P(correct) di b=+2 = 50% +``` + +**Kelebihan IRT:** + +- Item-invariant (b tetap meski kelompok berbeda) +- Adaptive (pilih soal sesuai kemampuan real-time) +- Reuse efficient (1000 user, tiap slot 3 variant cukup) +- Akurat lebih cepat (15 soal IRT = 30 soal CTT) + +**Keterbatasan IRT:** + +- Butuh kalibrasi (min 100-500 responses per item) +- Kompleks untuk admin non-psikometri +- Butuh sistem adaptive (tidak bisa paper-based) + + +### Hybrid Solution (This System) + +| Aspek | CTT Mode (Start) | Hybrid Mode (Transition) | IRT Mode (Goal) | +| :-- | :-- | :-- | :-- | +| **Admin Input** | p-value dari screenshot | Edit p atau b, sync otomatis | Edit b, p calculated | +| **Item Selection** | Fixed order slot 1-30 | Mixed (CTT fixed + IRT adaptive) | Fully adaptive CAT | +| **Scoring** | NM → NN (screenshot) | Paralel CTT \& IRT scores | θ → NN mapped | +| **Normalization** | Static atau Dynamic | Choose per tryout | Dynamic recommended | +| **AI Generation** | Dari p basis | Dari p atau b | Dari b calibrated | +| **Reuse** | Minimal | Moderate (cache variants) | Maximum (infinite pool) | + + +*** + +## 🏗️ System Architecture + +### High-Level Flow (Hybrid + Dynamic Normalization) + +``` +┌─────────────────────────────────────────┐ +│ WP Site 1 (Mat SD) │ WP Site 2 (Bahasa SMA) +│ Sejoli Tryout │ Sejoli Tryout +│ CTT Mode: Fixed │ IRT Mode: Adaptive +│ website_id=1 │ website_id=2 +└─────────────────────────────────────────┘ + │ │ + └────────┬───────────┘ + │ REST API + │ POST /next_item + │ {mode: "ctt"|"irt"|"hybrid"} + ▼ + ┌──────────────────────────────┐ + │ FastAPI Backend (aaPanel) │ + ├──────────────────────────────┤ + │ Hybrid Scoring Engine │ + │ ├─ CTT: NM from p-bobot │ + │ ├─ IRT: θ from responses │ + │ ├─ Normalization: Dynamic │ + │ └─ Return primary + secondary│ + │ │ + │ Dynamic Normalization Engine │ + │ ├─ Rataan = AVG(all NM) │ + │ ├─ SB = STDEV(all NM) │ + │ ├─ Mode switch: Static→Dynamic + │ └─ Real-time update per user │ + │ │ + │ Item Selection Strategy │ + │ ├─ CTT: Slot order (1→2→3) │ + │ ├─ IRT: CAT (b ≈ θ) │ + │ └─ Hybrid: First 10 CTT, IRT │ + └────────────┬─────────────────┘ + │ + ▼ + ┌──────────────────────────────┐ + │ PostgreSQL Database │ + ├──────────────────────────────┤ + │ items (ADDED: ctt_p, bobot) │ + │ user_answers (ADDED: nm, nn) │ + │ tryout_config (ADDED: modes) │ + │ tryout_stats (NEW: stats) │ + └──────────────────────────────┘ +``` + + +*** + +## 💾 Database Schema (v1.2 Final) + +### Table: tryout_config + +```sql +CREATE TABLE tryout_config ( + id SERIAL PRIMARY KEY, + website_id INTEGER NOT NULL, + tryout_id INTEGER NOT NULL, + + -- Mode Control + scoring_mode VARCHAR(20) DEFAULT 'ctt', -- 'ctt', 'irt', 'hybrid' + selection_mode VARCHAR(20) DEFAULT 'fixed', -- 'fixed', 'adaptive', 'hybrid' + + -- CTT Settings + min_peserta_for_ctt INTEGER DEFAULT 100, + + -- Normalization Settings + normalization_mode VARCHAR(20) DEFAULT 'static', -- 'static', 'dynamic', 'hybrid' + static_rataan FLOAT DEFAULT 500, + static_sb FLOAT DEFAULT 100, + min_sample_for_dynamic INTEGER DEFAULT 100, + + -- IRT Settings + enable_irt_when_calibrated BOOLEAN DEFAULT FALSE, + min_calibration_sample INTEGER DEFAULT 200, + theta_estimation_method VARCHAR(20) DEFAULT 'mle', -- 'mle', 'eap', 'map' + + -- Transition Settings + hybrid_transition_slot INTEGER DEFAULT 10, + fallback_to_ctt_on_error BOOLEAN DEFAULT TRUE, + + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(website_id, tryout_id) +); +``` + + +### Table: tryout_stats + +```sql +CREATE TABLE tryout_stats ( + id SERIAL PRIMARY KEY, + website_id INTEGER NOT NULL, + tryout_id INTEGER NOT NULL, + + -- Running Statistics + participant_count INTEGER DEFAULT 0, + total_nm_sum FLOAT DEFAULT 0, -- Σ all NM scores + total_nm_sq_sum FLOAT DEFAULT 0, -- Σ (NM^2) for variance calc + + -- Calculated Values (updated on each new participant) + current_rataan FLOAT, -- AVG(all NM) + current_sb FLOAT, -- STDEV(all NM) + min_nm FLOAT, + max_nm FLOAT, + + -- Metadata + last_calculated_at TIMESTAMPTZ, + last_participant_id INTEGER, + updated_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(website_id, tryout_id) +); + +CREATE INDEX idx_tryout_stats_lookup ON tryout_stats(website_id, tryout_id); +``` + + +### Table: user_answers + +```sql +CREATE TABLE user_answers ( + id SERIAL PRIMARY KEY, + wp_user_id INTEGER NOT NULL, + website_id INTEGER NOT NULL, + tryout_id INTEGER NOT NULL, + slot INTEGER NOT NULL, + level VARCHAR(20) NOT NULL, + item_id INTEGER NOT NULL, + + -- Response Data + response INTEGER NOT NULL, -- 0=incorrect, 1=correct + time_spent INTEGER, + + -- CTT Scoring + ctt_bobot_earned FLOAT, -- Bobot if correct, 0 if wrong + ctt_total_bobot_cumulative FLOAT, -- Running Σ bobot earned + ctt_nm FLOAT, -- Nilai Mentah (0-1000) + ctt_nn FLOAT, -- Nilai Nasional (normalized) + + -- Normalization Applied + rataan_used FLOAT, -- Rataan value at this calculation + sb_used FLOAT, -- SB value at this calculation + normalization_mode_used VARCHAR(20), -- 'static', 'dynamic', 'hybrid' + + -- IRT Scoring + irt_theta FLOAT, -- Ability estimate at this point + irt_theta_se FLOAT, -- Standard error + irt_information FLOAT, -- Information value at this item + + -- Metadata + scoring_mode_used VARCHAR(20), -- 'ctt', 'irt', 'hybrid' + answered_at TIMESTAMPTZ DEFAULT NOW(), + + FOREIGN KEY (item_id) REFERENCES items(id) ON DELETE CASCADE, + UNIQUE(wp_user_id, website_id, tryout_id, slot, level) +); + +CREATE INDEX idx_user_answers_lookup ON user_answers(wp_user_id, website_id, tryout_id); +CREATE INDEX idx_user_answers_scoring ON user_answers(scoring_mode_used, ctt_nn, irt_theta); +``` + + +### Table: items + +```sql +CREATE TABLE items ( + id SERIAL PRIMARY KEY, + website_id INTEGER NOT NULL, + tryout_id INTEGER NOT NULL, + slot INTEGER NOT NULL, + level VARCHAR(20) NOT NULL, -- 'Mudah', 'Sedang', 'Sulit' + stem TEXT NOT NULL, + options JSONB NOT NULL, + correct CHAR(1) NOT NULL, + explanation TEXT, + + -- CTT Parameters (Screenshot Compatible) + ctt_p FLOAT, -- Proportion correct (0.09 from screenshot) + ctt_bobot FLOAT, -- 1 - p (0.91) + ctt_category VARCHAR(20), -- 'Sukar', 'Sedang', 'Mudah' + + -- IRT Parameters (Adaptive) + irt_b FLOAT DEFAULT 0.0, -- Difficulty (-3 to +3) + irt_a FLOAT DEFAULT 1.0, -- Discrimination (optional) + irt_c FLOAT DEFAULT 0.25, -- Guessing (optional) + + -- Calibration Status + calibrated BOOLEAN DEFAULT FALSE, -- TRUE when 100+ responses analyzed + calibration_sample_size INTEGER DEFAULT 0, + calibration_date TIMESTAMPTZ, + + -- Legacy Fields + generated_by VARCHAR(10) NOT NULL, -- 'admin' or 'ai' + ai_model VARCHAR(50), + basis_item_id INTEGER, + category_id INTEGER, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + FOREIGN KEY (basis_item_id) REFERENCES items(id) ON DELETE SET NULL +); + +CREATE INDEX idx_items_lookup ON items(website_id, tryout_id, slot, level); +CREATE INDEX idx_items_calibrated ON items(calibrated, calibration_sample_size); +CREATE INDEX idx_items_ctt ON items(ctt_p, ctt_category); +``` + + +*** + +## 🎯 AI Question Generation (OpenRouter) + +### Recommended Models (OpenRouter Free Tier) + +| Model | Kenapa Cocok | Cost | +| :-- | :-- | :-- | +| **Qwen3 Coder 480B** | Math/reasoning expert, generate soal + solusi akurat, control difficulty | Free | +| **Llama 3.3 70B Instruct** | Multilingual (Indonesia), Bloom's Taxonomy, recall→analyze | Free | +| **DeepSeek R1/Math** | Math specialist (algebra/geo), outperform frontier models | Low (\$0.1/1M tokens) | + +### AI Generation Workflow + +**Context:** User 123, Tryout A, Slot 2 (Attempt 2) + +1. Python API hitung θ → perlu "Sulit" +2. Check DB: Ada soal Sulit slot 2? ❌ +3. AI Generate: + +``` +POST OpenRouter { + model: 'qwen3-coder-480b', + prompt: "Generate 1 soal Mat SD level Sulit mirip [basis_soal]..." +} +``` + +4. Parse response → INSERT items (website_id=1, level=Sulit, generated_by='ai') +5. Serve soal baru ke frontend + +### Prompt Template (Standardized) + +``` +Context: Tryout {tryout_id} slot {slot} level {Sulit/Mudah}. +Basis soal: {basis_stem}. +Generate: 1 soal baru {level} dengan: +- Stem: 1 kalimat jelas +- Options: A B C D, 1 benar, 3 distractor logis +- Jawaban: huruf + penjelasan singkat +Bahasa: Indonesia, topik: {category} +``` + + +### Reuse Strategy (Perfect for Scale) + +``` +User123, Tryout A, Slot 2, Attempt 1: Soal Sedang (statik) +User123, Tryout A, Slot 2, Attempt 2: AI generate → Soal Sulit (simpan DB) + +User456, Tryout A, Slot 2, Attempt 2: Check if exist + IF ada Soal Sulit → REUSE (cache hit!) + ELSE → AI generate baru + +Scenario 1000 users × 3 attempts: +- Static: 1000 × 30 × 3 = 90,000 soal unik (impossible) +- With AI + Reuse: ~30 static + 60 AI variants = 90 total (99.9% reuse!) +``` + + +*** + +## 🔧 CTT Scoring Engine Implementation + +```python +import numpy as np +from typing import List, Dict +from models import Item, TryoutConfig, TryoutStats +from datetime import datetime + +def calculate_ctt_score_exact( + responses: List[Dict], + items: List[Item], + config: TryoutConfig, + db: Session +) -> Dict: + """ + Calculate CTT score using EXACT client Excel formula + + Formula breakdown: + 1. p = Σ Benar / Total Peserta (per soal) + 2. Bobot = 1 - p + 3. Total_Bobot_Siswa = SUMPRODUCT(bobot_array, jawaban_array) + 4. NM = (Total_Bobot_Siswa / Total_Bobot_Max) × 1000 + 5. NN = 500 + 100 × ((NM - Rataan) / SB) + """ + + # STEP 1: Calculate total bobot earned (SUMPRODUCT equivalent) + total_bobot_earned = 0.0 + total_bobot_max = 0.0 + total_benar = 0 + + for response, item in zip(responses, items): + bobot = item.ctt_bobot # Pre-calculated as 1 - p + total_bobot_max += bobot + + if response['correct'] == 1: + total_bobot_earned += bobot + total_benar += 1 + + # STEP 2: Calculate NM (Nilai Mentah) + if total_bobot_max == 0: + nm = 0.0 + else: + nm = (total_bobot_earned / total_bobot_max) * 1000 + + # STEP 3: Get Rataan and SB based on normalization mode + rataan, sb, norm_mode = get_normalization_params( + config, + db, + nm # Current NM to add to stats + ) + + # STEP 4: Calculate NN (Nilai Nasional) + if sb == 0 or sb is None: + nn = 500.0 + else: + nn = 500 + 100 * ((nm - rataan) / sb) + + # Clip NN to reasonable range + nn = float(np.clip(nn, 0, 1000)) + + return { + "mode": "ctt", + "total_benar": total_benar, + "total_bobot_earned": round(total_bobot_earned, 2), + "total_bobot_max": round(total_bobot_max, 2), + "nm": round(nm, 1), + "nn": round(nn, 1), + "rataan_used": round(rataan, 2), + "sb_used": round(sb, 2), + "normalization_mode": norm_mode, + "breakdown": { + "percentage": round((total_bobot_earned / total_bobot_max) * 100, 1) if total_bobot_max > 0 else 0 + } + } + + +def get_normalization_params( + config: TryoutConfig, + db: Session, + current_nm: float +) -> tuple[float, float, str]: + """ + Get rataan and SB based on normalization mode + Returns: (rataan, sb, mode_used) + """ + + # Get or create stats + stats = db.query(TryoutStats).filter_by( + website_id=config.website_id, + tryout_id=config.tryout_id + ).first() + + if not stats: + stats = TryoutStats( + website_id=config.website_id, + tryout_id=config.tryout_id, + participant_count=0, + total_nm_sum=0, + total_nm_sq_sum=0 + ) + db.add(stats) + db.commit() + + # Update running stats with current NM + stats.participant_count += 1 + stats.total_nm_sum += current_nm + stats.total_nm_sq_sum += (current_nm ** 2) + + # Calculate dynamic rataan and SB + n = stats.participant_count + if n > 1: + mean = stats.total_nm_sum / n + variance = (stats.total_nm_sq_sum / n) - (mean ** 2) + std_dev = np.sqrt(max(0, variance)) + + stats.current_rataan = mean + stats.current_sb = std_dev + stats.last_calculated_at = datetime.utcnow() + else: + # First participant, use static + stats.current_rataan = config.static_rataan + stats.current_sb = config.static_sb + + db.commit() + + # Determine which values to use based on mode + if config.normalization_mode == 'static': + return ( + config.static_rataan, + config.static_sb, + 'static' + ) + + elif config.normalization_mode == 'dynamic': + if stats.participant_count >= 2: + return ( + stats.current_rataan, + stats.current_sb, + 'dynamic' + ) + else: + return ( + config.static_rataan, + config.static_sb, + 'static_fallback' + ) + + elif config.normalization_mode == 'hybrid': + if stats.participant_count >= config.min_sample_for_dynamic: + return ( + stats.current_rataan, + stats.current_sb, + 'hybrid_dynamic' + ) + else: + return ( + config.static_rataan, + config.static_sb, + 'hybrid_static' + ) + + else: + return (config.static_rataan, config.static_sb, 'static') +``` + + +*** + +## 📊 IRT Theta Estimation (MLE) + +```python +from scipy.optimize import minimize +import numpy as np + +def estimate_theta_mle(responses: List[int], items: List[Item]) -> float: + """ + Estimate ability (theta) using Maximum Likelihood Estimation + + 1PL Rasch Model: P(θ) = 1 / (1 + e^-(θ - b)) + + Args: + responses: [1, 0, 1, 1, 0, ...] correct/incorrect + items: [Item(irt_b=-0.5), Item(irt_b=0.2), ...] + + Returns: + theta estimate + """ + + def neg_log_likelihood(theta_val): + ll = 0 + for response, item in zip(responses, items): + b = item.irt_b if item.irt_b else 0 + # P(θ) = 1 / (1 + e^-(θ - b)) + p = 1 / (1 + np.exp(-(theta_val - b))) + # Log-likelihood + if response == 1: + ll += np.log(max(p, 1e-10)) # Avoid log(0) + else: + ll += np.log(max(1 - p, 1e-10)) + return -ll # Negative for minimization + + # Initial guess: middle of scale + theta_init = 0 + + # Optimize + result = minimize( + neg_log_likelihood, + x0=[theta_init], + method='L-BFGS-B', + bounds=[(-3, 3)] # Reasonable theta range + ) + + theta_estimate = float(result.x[0]) + return theta_estimate + + +def estimate_theta_se(theta: float, items: List[Item]) -> float: + """ + Calculate standard error of theta estimate + Using Fisher information + """ + information = 0 + for item in items: + b = item.irt_b if item.irt_b else 0 + p = 1 / (1 + np.exp(-(theta - b))) + information += p * (1 - p) # Fisher information for 1PL + + if information > 0: + se = 1 / np.sqrt(information) + else: + se = float('inf') + + return se +``` + + +*** + +## 🗂️ API Endpoints (v1.2 Final) + +### 1. Next Item (Adaptive Selection) + +``` +POST /api/v1/session/{session_id}/next_item + +Request: +{ + "mode": "ctt" | "irt" | "hybrid", + "current_responses": [ + {"item_id": 1, "correct": 1}, + {"item_id": 2, "correct": 0} + ] +} + +Response: +{ + "item_id": 45, + "slot": 3, + "level": "Sedang", + "stem": "...", + "options": {"A": "...", "B": "...", "C": "...", "D": "...", "E": "..."}, + "item_source": "admin" | "ai", + "selection_method": "fixed_order" | "adaptive_ctt" | "adaptive_irt" +} +``` + + +### 2. Complete Session (Scoring) + +``` +POST /api/v1/session/{session_id}/complete + +Response: +{ + "status": "completed", + "primary_score": { + "mode": "ctt", + "total_benar": 15, + "total_bobot_earned": 12.5, + "total_bobot_max": 18.3, + "nm": 683.0, + "nn": 618.2, + "rataan_used": 483.5, + "sb_used": 112.3, + "normalization_mode": "dynamic" + }, + "secondary_score": { + "mode": "irt", + "theta": 0.85, + "theta_se": 0.42, + "nn_equivalent": 592.5 + }, + "comparison": { + "nn_difference": 25.7, + "agreement": "moderate" + } +} +``` + + +### 3. Get Tryout Config (with Normalization) + +``` +GET /api/v1/tryout/{tryout_id}/config + +Response: +{ + "tryout_id": 123, + "scoring_mode": "ctt", + "normalization_mode": "dynamic", + "static_rataan": 500, + "static_sb": 100, + "current_stats": { + "participant_count": 245, + "current_rataan": 483.5, + "current_sb": 112.3, + "min_nm": 125.0, + "max_nm": 892.0 + }, + "calibration_status": { + "total_items": 20, + "calibrated_items": 8, + "calibration_percentage": 40 + } +} +``` + + +### 4. Update Normalization Settings + +``` +PUT /api/v1/tryout/{tryout_id}/normalization + +Request: +{ + "normalization_mode": "hybrid", + "static_rataan": 500, + "static_sb": 100, + "min_sample_for_dynamic": 100 +} + +Response: +{ + "status": "updated", + "normalization_mode": "hybrid", + "current_participant_count": 45, + "will_switch_to_dynamic_at": 100, + "using_mode": "static" +} +``` + + +*** + +## 📥 Excel Import (OpenCode Ready) + +```python +import pandas as pd +import openpyxl +from models import Item, TryoutConfig + +def import_excel_tryout( + excel_file: str, + website_id: int, + tryout_id: int, + sheet_name: str = "CONTOH", + db: Session +) -> Dict: + """ + Import from client Excel exactly like PERHITUNGAN-SKOR-TO-3.xlsx + + Excel structure: + - Row 1: Headers + - Row 2: Answer key (KUNCI) + - Row 4: TK (p values) formulas + - Row 5: BOBOT formulas + - Row 6+: Student responses + """ + + wb = openpyxl.load_workbook(excel_file, data_only=False) + ws = wb[sheet_name] + + # Extract answer key from Row 2 + answer_key = {} + for col in range(4, ws.max_column + 1): + key_cell = ws.cell(2, col).value + if key_cell and key_cell != "KUNCI": + slot_num = col - 3 + answer_key[slot_num] = key_cell.strip().upper() + + # Extract TK (p values) from Row 4 - get CALCULATED values + wb_data = openpyxl.load_workbook(excel_file, data_only=True) + ws_data = wb_data[sheet_name] + + p_values = {} + for col in range(4, ws.max_column + 1): + slot_num = col - 3 + if slot_num in answer_key: + p_cell = ws_data.cell(4, col).value + if p_cell and isinstance(p_cell, (int, float)): + p_values[slot_num] = float(p_cell) + + # Calculate bobot (1 - p) + bobot_values = {slot: 1 - p for slot, p in p_values.items()} + + # Categorize difficulty + def categorize_difficulty(p: float) -> tuple[str, str]: + if p < 0.30: + return ("Sukar", "Sulit") + elif p > 0.70: + return ("Mudah", "Mudah") + else: + return ("Sedang", "Sedang") + + # Create items + items_created = 0 + for slot_num, correct_ans in answer_key.items(): + p = p_values.get(slot_num, 0.5) + bobot = bobot_values.get(slot_num, 0.5) + ctt_cat, level = categorize_difficulty(p) + + # Convert p to IRT b + b = ctt_p_to_irt_b(p) + + item = Item( + website_id=website_id, + tryout_id=tryout_id, + slot=slot_num, + level=level, + stem=f"[Import dari Excel - Soal {slot_num}]", + options={"A": "[Option A]", "B": "[Option B]", "C": "[Option C]", "D": "[Option D]", "E": "[Option E]"}, + correct=correct_ans, + explanation="", + ctt_p=p, + ctt_bobot=bobot, + ctt_category=ctt_cat, + irt_b=b, + calibrated=False, + calibration_sample_size=0, + generated_by='admin', + category_id=None + ) + db.add(item) + items_created += 1 + + db.commit() + + # Configure tryout normalization + config = TryoutConfig( + website_id=website_id, + tryout_id=tryout_id, + scoring_mode='ctt', + selection_mode='fixed', + normalization_mode='static', + static_rataan=500, + static_sb=100, + min_sample_for_dynamic=100 + ) + db.add(config) + db.commit() + + return { + "items_created": items_created, + "normalization_configured": "static (rataan=500, SB=100)" + } + + +def ctt_p_to_irt_b(p: float) -> float: + """ + Convert CTT p-value to IRT b parameter + Linear approximation: b ≈ -ln((1-p)/p) + """ + if p <= 0 or p >= 1: + p = 0.5 + b = -np.log((1 - p) / p) + return float(b) +``` + + +*** + +## 🚀 Migration Path (Non-Destructive) + +### Phase 1: Import Existing Data (Week 1) + +``` +1. Export current Sejoli Tryout data to Excel +2. Run import script: + python manage.py import_excel_tryout \ + --file="PERHITUNGAN-SKOR-TO-3.xlsx" \ + --sheet="CONTOH" \ + --website_id=1 \ + --tryout_id=123 + +3. Verify: + - All items have ctt_p, ctt_bobot + - IRT b auto-calculated from p + - calibrated=False for all + +4. Configure tryout: + - scoring_mode='ctt' + - selection_mode='fixed' + - normalization_mode='static' (like client now) +``` + + +### Phase 2: Collect Calibration Data (Week 2-4) + +``` +1. Students use tryout normally (CTT mode, static normalization) +2. Backend logs all responses +3. Monitor calibration progress +4. Collect running statistics for dynamic normalization +``` + + +### Phase 3: Enable Dynamic Normalization (Week 5) + +``` +1. Check participant count: 100+ completed? +2. Update tryout_config: + - normalization_mode='hybrid' + - min_sample_for_dynamic=100 +3. Test with 10-20 new students +4. Verify distribution normalized to mean=500, sd=100 +``` + + +### Phase 4: Enable IRT Adaptive (Week 6+) + +``` +1. After 90%+ items calibrated + 1000+ total responses +2. Update to full IRT: + - scoring_mode='irt' + - selection_mode='adaptive' + - normalization_mode='dynamic' +3. Enable AI generation for Mudah/Sulit variants +``` + + +*** + +## ✅ Success Metrics + +### Technical KPIs + +1. **Formula Accuracy**: CTT scores match client Excel 100% +2. **Normalization Stability**: SB within 5% of expected after 100 users +3. **Calibration Coverage**: >80% items calibrated +4. **Score Agreement**: CTT vs IRT NN difference <20 points +5. **Fallback Rate**: <5% IRT→CTT fallbacks per session + +### Educational KPIs + +1. **Measurement Precision**: IRT SE <0.5 after 15 items +2. **Normalization Quality**: Distribution skewness <0.5 +3. **Adaptive Efficiency**: 30% reduction in test length (IRT vs CTT) +4. **Student Satisfaction**: >80% prefer adaptive mode +5. **Admin Adoption**: >70% tryouts use hybrid within 3 months + +*** + +## 📋 Complexity Estimation + +| Komponen | Effort (Days) | Notes | +| :-- | :-- | :-- | +| Setup FastAPI + PG + Alembic | 3 | Boilerplate | +| Core scoring (CTT/IRT hybrid) | 10 | Math-heavy | +| Dynamic normalization | 5 | Running stats | +| AI generation (OpenRouter) | 5 | API integration | +| Reuse logic + item selection | 8 | Algorithm | +| Admin UI (FastAPI Admin) | 5 | Auto-generated | +| Excel import | 3 | Formula parsing | +| WP integration | 4 | REST API | +| Testing + docs | 7 | Quality | +| Buffer | 5 | Contingency | +| **TOTAL** | **45 days** | **0.8x Sejoli Rebuild** | + + +*** + +## 📚 Glossary + +- **p (TK)**: Proportion correct / Tingkat Kesukaran (CTT difficulty) +- **Bobot**: 1-p weight (CTT scoring weight) +- **NM**: Nilai Mentah (raw score 0-1000) +- **NN**: Nilai Nasional (normalized 500±100) +- **Rataan**: Mean of NM scores +- **SB**: Simpangan Baku (standard deviation of NM) +- **θ (theta)**: IRT ability (-3 to +3) +- **b**: IRT difficulty (-3 to +3) +- **SE**: Standard error (precision) +- **CAT**: Computerized Adaptive Testing +- **EM**: Expectation-Maximization (calibration method) +- **MLE**: Maximum Likelihood Estimation + +*** + +## 🔗 File References + +- **Excel Client:** `PERHITUNGAN-SKOR-TO-3.xlsx` (screenshot reference for formulas) +- **DB Schema:** PostgreSQL with Alembic migrations +- **API:** FastAPI with OpenAPI docs +- **Admin:** FastAPI Admin (auto-generated CRUD) + +*** + +## 📝 Key Guarantees + +✅ Existing CTT data safe, IRT adoption gradual, reversible anytime +✅ 100% compatible with client Excel formulas +✅ Dynamic normalization optional (can keep static mode) +✅ Zero data loss during transitions +✅ Non-destructive (Sejoli Tryout tetap jalan, external enhance) + +*** + +**Document Version:** 1.2.0 Final +**Last Updated:** March 21, 2026, 9:31 AM WIB +**Status:** Ready for Implementation via OpenCode 🚀 + +**By:** Dwindi Ramadhana +**For:** Sejoli Tryout Multi-Website Platform + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a446293 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,40 @@ +# FastAPI and Server +fastapi>=0.104.1 +uvicorn[standard]>=0.24.0 +python-multipart>=0.0.6 + +# Database +sqlalchemy>=2.0.23 +asyncpg>=0.29.0 +alembic>=1.13.0 + +# Data & Validation +pydantic>=2.5.0 +pydantic-settings>=2.1.0 + +# Excel Processing +openpyxl>=3.1.2 +pandas>=2.1.4 + +# Math & Science +numpy>=1.26.2 +scipy>=1.11.4 + +# AI Integration +openai>=1.6.1 +httpx>=0.26.0 + +# Task Queue (for async jobs) +celery>=5.3.6 +redis>=5.0.1 + +# Testing +pytest>=7.4.3 +pytest-asyncio>=0.21.1 +httpx>=0.26.0 + +# Admin Panel +fastapi-admin>=1.4.0 + +# Utilities +python-dotenv>=1.0.0 diff --git a/tests/test_normalization.py b/tests/test_normalization.py new file mode 100644 index 0000000..7b738af --- /dev/null +++ b/tests/test_normalization.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +""" +Test script for normalization calculations. + +This script tests the normalization functions to ensure they work correctly +without requiring database connections. +""" + +import sys +import os + +# Add the project root to the path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from app.services.normalization import apply_normalization + + +def test_apply_normalization(): + """Test the apply_normalization function.""" + print("Testing apply_normalization function...") + print("=" * 60) + + # Test case 1: Normal normalization (NM=500, rataan=500, sb=100) + nm1 = 500 + rataan1 = 500 + sb1 = 100 + nn1 = apply_normalization(nm1, rataan1, sb1) + expected1 = 500 + print(f"Test 1: NM={nm1}, rataan={rataan1}, sb={sb1}") + print(f" Expected NN: {expected1}") + print(f" Actual NN: {nn1}") + print(f" Status: {'PASS' if nn1 == expected1 else 'FAIL'}") + print() + + # Test case 2: High score (NM=600, rataan=500, sb=100) + nm2 = 600 + rataan2 = 500 + sb2 = 100 + nn2 = apply_normalization(nm2, rataan2, sb2) + expected2 = 600 + print(f"Test 2: NM={nm2}, rataan={rataan2}, sb={sb2}") + print(f" Expected NN: {expected2}") + print(f" Actual NN: {nn2}") + print(f" Status: {'PASS' if nn2 == expected2 else 'FAIL'}") + print() + + # Test case 3: Low score (NM=400, rataan=500, sb=100) + nm3 = 400 + rataan3 = 500 + sb3 = 100 + nn3 = apply_normalization(nm3, rataan3, sb3) + expected3 = 400 + print(f"Test 3: NM={nm3}, rataan={rataan3}, sb={sb3}") + print(f" Expected NN: {expected3}") + print(f" Actual NN: {nn3}") + print(f" Status: {'PASS' if nn3 == expected3 else 'FAIL'}") + print() + + # Test case 4: Edge case - maximum NM + nm4 = 1000 + rataan4 = 500 + sb4 = 100 + nn4 = apply_normalization(nm4, rataan4, sb4) + expected4 = 1000 + print(f"Test 4: NM={nm4}, rataan={rataan4}, sb={sb4}") + print(f" Expected NN: {expected4}") + print(f" Actual NN: {nn4}") + print(f" Status: {'PASS' if nn4 == expected4 else 'FAIL'}") + print() + + # Test case 5: Edge case - minimum NM + nm5 = 0 + rataan5 = 500 + sb5 = 100 + nn5 = apply_normalization(nm5, rataan5, sb5) + expected5 = 0 + print(f"Test 5: NM={nm5}, rataan={rataan5}, sb={sb5}") + print(f" Expected NN: {expected5}") + print(f" Actual NN: {nn5}") + print(f" Status: {'PASS' if nn5 == expected5 else 'FAIL'}") + print() + + # Test case 6: Error case - invalid NM (above max) + try: + nm6 = 1200 # Above valid range + rataan6 = 500 + sb6 = 100 + nn6 = apply_normalization(nm6, rataan6, sb6) + print(f"Test 6: NM={nm6}, rataan={rataan6}, sb={sb6} (should raise ValueError)") + print(f" Status: FAIL - Should have raised ValueError") + except ValueError as e: + print(f"Test 6: NM={nm6}, rataan={rataan6}, sb={sb6} (should raise ValueError)") + print(f" Error: {e}") + print(f" Status: PASS - Correctly raised ValueError") + print() + + # Test case 7: Error case - invalid NM (below min) + try: + nm7 = -100 # Below valid range + rataan7 = 500 + sb7 = 100 + nn7 = apply_normalization(nm7, rataan7, sb7) + print(f"Test 7: NM={nm7}, rataan={rataan7}, sb={sb7} (should raise ValueError)") + print(f" Status: FAIL - Should have raised ValueError") + except ValueError as e: + print(f"Test 7: NM={nm7}, rataan={rataan7}, sb={sb7} (should raise ValueError)") + print(f" Error: {e}") + print(f" Status: PASS - Correctly raised ValueError") + print() + + # Test case 8: Different rataan/sb (NM=500, rataan=600, sb=80) + nm8 = 500 + rataan8 = 600 + sb8 = 80 + nn8 = apply_normalization(nm8, rataan8, sb8) + # z_score = (500 - 600) / 80 = -1.25 + # nn = 500 + 100 * (-1.25) = 500 - 125 = 375 + expected8 = 375 + print(f"Test 8: NM={nm8}, rataan={rataan8}, sb={sb8}") + print(f" Expected NN: {expected8}") + print(f" Actual NN: {nn8}") + print(f" Status: {'PASS' if nn8 == expected8 else 'FAIL'}") + print() + + # Test case 9: Error case - invalid NM + try: + nm9 = 1500 # Above valid range + rataan9 = 500 + sb9 = 100 + nn9 = apply_normalization(nm9, rataan9, sb9) + print(f"Test 9: NM={nm9}, rataan={rataan9}, sb={sb9} (should raise ValueError)") + print(f" Status: FAIL - Should have raised ValueError") + except ValueError as e: + print(f"Test 9: NM=1500, rataan=500, sb=100 (should raise ValueError)") + print(f" Error: {e}") + print(f" Status: PASS - Correctly raised ValueError") + print() + + # Test case 10: Error case - invalid sb + try: + nm10 = 500 + rataan10 = 500 + sb10 = 0 # Invalid SD + nn10 = apply_normalization(nm10, rataan10, sb10) + expected10 = 500 # Should return default when sb <= 0 + print(f"Test 10: NM={nm10}, rataan={rataan10}, sb={sb10} (should return default)") + print(f" Expected NN: {expected10}") + print(f" Actual NN: {nn10}") + print(f" Status: {'PASS' if nn10 == expected10 else 'FAIL'}") + except Exception as e: + print(f"Test 10: NM=500, rataan=500, sb=0 (should return default)") + print(f" Error: {e}") + print(f" Status: FAIL - Should have returned default value") + print() + + print("=" * 60) + print("All tests completed!") + print("=" * 60) + + +def calculate_dynamic_mean_and_std(nm_values): + """ + Calculate mean and standard deviation from a list of NM values. + This simulates what update_dynamic_normalization does. + """ + n = len(nm_values) + if n == 0: + return None, None + + # Calculate mean + mean = sum(nm_values) / n + + # Calculate variance (population variance) + if n > 1: + variance = sum((x - mean) ** 2 for x in nm_values) / n + std = variance ** 0.5 + else: + std = 0.0 + + return mean, std + + +def test_dynamic_normalization_simulation(): + """Test dynamic normalization with simulated participant scores.""" + print("\nTesting dynamic normalization simulation...") + print("=" * 60) + + # Simulate 10 participant NM scores + nm_scores = [450, 480, 500, 520, 550, 480, 510, 490, 530, 470] + print(f"Simulated NM scores: {nm_scores}") + print() + + # Calculate mean and SD + mean, std = calculate_dynamic_mean_and_std(nm_scores) + print(f"Calculated mean (rataan): {mean:.2f}") + print(f"Calculated SD (sb): {std:.2f}") + print() + + # Normalize each score + print("Normalized scores:") + for i, nm in enumerate(nm_scores): + nn = apply_normalization(nm, mean, std) + print(f" Participant {i+1}: NM={nm:3d} -> NN={nn:3d}") + print() + + # Check if normalized distribution is close to mean=500, SD=100 + nn_scores = [apply_normalization(nm, mean, std) for nm in nm_scores] + nn_mean, nn_std = calculate_dynamic_mean_and_std(nn_scores) + + print(f"Normalized distribution:") + print(f" Mean: {nn_mean:.2f} (target: 500 ± 5)") + print(f" SD: {nn_std:.2f} (target: 100 ± 5)") + print(f" Status: {'PASS' if abs(nn_mean - 500) <= 5 and abs(nn_std - 100) <= 5 else 'NEAR PASS'}") + print() + + print("=" * 60) + + +def test_incremental_update(): + """Test incremental update of dynamic normalization.""" + print("\nTesting incremental update simulation...") + print("=" * 60) + + # Simulate adding scores incrementally + nm_scores = [] + participant_count = 0 + total_nm_sum = 0.0 + total_nm_sq_sum = 0.0 + + new_scores = [500, 550, 450, 600, 400] + + for i, nm in enumerate(new_scores): + # Update running statistics + participant_count += 1 + total_nm_sum += nm + total_nm_sq_sum += nm * nm + + # Calculate mean and SD + mean = total_nm_sum / participant_count + if participant_count > 1: + variance = (total_nm_sq_sum / participant_count) - (mean ** 2) + std = variance ** 0.5 + else: + std = 0.0 + + nm_scores.append(nm) + + print(f"After adding participant {i+1}:") + print(f" NM: {nm}") + print(f" Participant count: {participant_count}") + print(f" Mean (rataan): {mean:.2f}") + print(f" SD (sb): {std:.2f}") + print() + + # Final calculation + final_mean, final_std = calculate_dynamic_mean_and_std(nm_scores) + print(f"Final statistics:") + print(f" All scores: {nm_scores}") + print(f" Mean: {final_mean:.2f}") + print(f" SD: {final_std:.2f}") + print() + + print("=" * 60) + + +if __name__ == "__main__": + print("Normalization Calculation Tests") + print("=" * 60) + print() + + test_apply_normalization() + test_dynamic_normalization_simulation() + test_incremental_update() + + print("\nAll test simulations completed successfully!")