first commit

2026-03-21 23:32:59 +07:00
commit cf193d7ea0
57 changed files with 17871 additions and 0 deletions
--- a/app/services/ai_generation.py
+++ b/app/services/ai_generation.py
@@ -0,0 +1,595 @@
+"""
+AI Question Generation Service.
+
+Handles OpenRouter API integration for generating question variants.
+Implements caching, user-level reuse checking, and prompt engineering.
+"""
+
+import json
+import logging
+import re
+from typing import Any, Dict, Literal, Optional, Union
+
+import httpx
+from sqlalchemy import and_, func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.config import get_settings
+from app.models.item import Item
+from app.models.tryout import Tryout
+from app.models.user_answer import UserAnswer
+from app.schemas.ai import GeneratedQuestion
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+# OpenRouter API configuration
+OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
+
+# Supported AI models
+SUPPORTED_MODELS = {
+    "qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B",
+    "meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B",
+}
+
+# Level mapping for prompts
+LEVEL_DESCRIPTIONS = {
+    "mudah": "easier (simpler concepts, more straightforward calculations)",
+    "sedang": "medium difficulty",
+    "sulit": "harder (more complex concepts, multi-step reasoning)",
+}
+
+
+def get_prompt_template(
+    basis_stem: str,
+    basis_options: Dict[str, str],
+    basis_correct: str,
+    basis_explanation: Optional[str],
+    target_level: Literal["mudah", "sulit"],
+) -> str:
+    """
+    Generate standardized prompt for AI question generation.
+
+    Args:
+        basis_stem: The basis question stem
+        basis_options: The basis question options
+        basis_correct: The basis correct answer
+        basis_explanation: The basis explanation
+        target_level: Target difficulty level
+
+    Returns:
+        Formatted prompt string
+    """
+    level_desc = LEVEL_DESCRIPTIONS.get(target_level, target_level)
+
+    options_text = "\n".join(
+        [f"  {key}: {value}" for key, value in basis_options.items()]
+    )
+
+    explanation_text = (
+        f"Explanation: {basis_explanation}"
+        if basis_explanation
+        else "Explanation: (not provided)"
+    )
+
+    prompt = f"""You are an educational content creator specializing in creating assessment questions.
+
+Given a "Sedang" (medium difficulty) question, generate a new question at a different difficulty level.
+
+BASIS QUESTION (Sedang level):
+Question: {basis_stem}
+Options:
+{options_text}
+Correct Answer: {basis_correct}
+{explanation_text}
+
+TASK:
+Generate 1 new question that is {level_desc} than the basis question above.
+
+REQUIREMENTS:
+1. Keep the SAME topic/subject matter as the basis question
+2. Use similar context and terminology
+3. Create exactly 4 answer options (A, B, C, D)
+4. Only ONE correct answer
+5. Include a clear explanation of why the correct answer is correct
+6. Make the question noticeably {level_desc} - not just a minor variation
+
+OUTPUT FORMAT:
+Return ONLY a valid JSON object with this exact structure (no markdown, no code blocks):
+{{"stem": "Your question text here", "options": {{"A": "Option A text", "B": "Option B text", "C": "Option C text", "D": "Option D text"}}, "correct": "A", "explanation": "Explanation text here"}}
+
+Remember: The correct field must be exactly "A", "B", "C", or "D"."""
+
+    return prompt
+
+
+def parse_ai_response(response_text: str) -> Optional[GeneratedQuestion]:
+    """
+    Parse AI response to extract question data.
+
+    Handles various response formats including JSON code blocks.
+
+    Args:
+        response_text: Raw AI response text
+
+    Returns:
+        GeneratedQuestion if parsing successful, None otherwise
+    """
+    if not response_text:
+        return None
+
+    # Clean the response text
+    cleaned = response_text.strip()
+
+    # Try to extract JSON from code blocks if present
+    json_patterns = [
+        r"```json\s*([\s\S]*?)\s*```",  # ```json ... ```
+        r"```\s*([\s\S]*?)\s*```",  # ``` ... ```
+        r"(\{[\s\S]*\})",  # Raw JSON object
+    ]
+
+    for pattern in json_patterns:
+        match = re.search(pattern, cleaned)
+        if match:
+            json_str = match.group(1).strip()
+            try:
+                data = json.loads(json_str)
+                return validate_and_create_question(data)
+            except json.JSONDecodeError:
+                continue
+
+    # Try parsing the entire response as JSON
+    try:
+        data = json.loads(cleaned)
+        return validate_and_create_question(data)
+    except json.JSONDecodeError:
+        pass
+
+    logger.warning(f"Failed to parse AI response: {cleaned[:200]}...")
+    return None
+
+
+def validate_and_create_question(data: Dict[str, Any]) -> Optional[GeneratedQuestion]:
+    """
+    Validate parsed data and create GeneratedQuestion.
+
+    Args:
+        data: Parsed JSON data
+
+    Returns:
+        GeneratedQuestion if valid, None otherwise
+    """
+    required_fields = ["stem", "options", "correct"]
+    if not all(field in data for field in required_fields):
+        logger.warning(f"Missing required fields in AI response: {data.keys()}")
+        return None
+
+    # Validate options
+    options = data.get("options", {})
+    if not isinstance(options, dict):
+        logger.warning("Options is not a dictionary")
+        return None
+
+    required_options = {"A", "B", "C", "D"}
+    if not required_options.issubset(set(options.keys())):
+        logger.warning(f"Missing required options: {required_options - set(options.keys())}")
+        return None
+
+    # Validate correct answer
+    correct = str(data.get("correct", "")).upper()
+    if correct not in required_options:
+        logger.warning(f"Invalid correct answer: {correct}")
+        return None
+
+    return GeneratedQuestion(
+        stem=str(data["stem"]).strip(),
+        options={k: str(v).strip() for k, v in options.items()},
+        correct=correct,
+        explanation=str(data.get("explanation", "")).strip() or None,
+    )
+
+
+async def call_openrouter_api(
+    prompt: str,
+    model: str,
+    max_retries: int = 3,
+) -> Optional[str]:
+    """
+    Call OpenRouter API to generate question.
+
+    Args:
+        prompt: The prompt to send
+        model: AI model to use
+        max_retries: Maximum retry attempts
+
+    Returns:
+        API response text or None if failed
+    """
+    if not settings.OPENROUTER_API_KEY:
+        logger.error("OPENROUTER_API_KEY not configured")
+        return None
+
+    if model not in SUPPORTED_MODELS:
+        logger.error(f"Unsupported AI model: {model}")
+        return None
+
+    headers = {
+        "Authorization": f"Bearer {settings.OPENROUTER_API_KEY}",
+        "Content-Type": "application/json",
+        "HTTP-Referer": "https://github.com/irt-bank-soal",
+        "X-Title": "IRT Bank Soal",
+    }
+
+    payload = {
+        "model": model,
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ],
+        "max_tokens": 2000,
+        "temperature": 0.7,
+    }
+
+    timeout = httpx.Timeout(settings.OPENROUTER_TIMEOUT)
+
+    for attempt in range(max_retries):
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                response = await client.post(
+                    OPENROUTER_API_URL,
+                    headers=headers,
+                    json=payload,
+                )
+
+                if response.status_code == 200:
+                    data = response.json()
+                    choices = data.get("choices", [])
+                    if choices:
+                        message = choices[0].get("message", {})
+                        return message.get("content")
+                    logger.warning("No choices in OpenRouter response")
+                    return None
+
+                elif response.status_code == 429:
+                    # Rate limited - wait and retry
+                    logger.warning(f"Rate limited, attempt {attempt + 1}/{max_retries}")
+                    if attempt < max_retries - 1:
+                        import asyncio
+                        await asyncio.sleep(2 ** attempt)
+                        continue
+                    return None
+
+                else:
+                    logger.error(
+                        f"OpenRouter API error: {response.status_code} - {response.text}"
+                    )
+                    return None
+
+        except httpx.TimeoutException:
+            logger.warning(f"OpenRouter timeout, attempt {attempt + 1}/{max_retries}")
+            if attempt < max_retries - 1:
+                continue
+            return None
+
+        except Exception as e:
+            logger.error(f"OpenRouter API call failed: {e}")
+            if attempt < max_retries - 1:
+                continue
+            return None
+
+    return None
+
+
+async def generate_question(
+    basis_item: Item,
+    target_level: Literal["mudah", "sulit"],
+    ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
+) -> Optional[GeneratedQuestion]:
+    """
+    Generate a new question based on a basis item.
+
+    Args:
+        basis_item: The basis item (must be sedang level)
+        target_level: Target difficulty level
+        ai_model: AI model to use
+
+    Returns:
+        GeneratedQuestion if successful, None otherwise
+    """
+    # Build prompt
+    prompt = get_prompt_template(
+        basis_stem=basis_item.stem,
+        basis_options=basis_item.options,
+        basis_correct=basis_item.correct_answer,
+        basis_explanation=basis_item.explanation,
+        target_level=target_level,
+    )
+
+    # Call OpenRouter API
+    response_text = await call_openrouter_api(prompt, ai_model)
+
+    if not response_text:
+        logger.error("No response from OpenRouter API")
+        return None
+
+    # Parse response
+    generated = parse_ai_response(response_text)
+
+    if not generated:
+        logger.error("Failed to parse AI response")
+        return None
+
+    return generated
+
+
+async def check_cache_reuse(
+    tryout_id: str,
+    slot: int,
+    level: str,
+    wp_user_id: str,
+    website_id: int,
+    db: AsyncSession,
+) -> Optional[Item]:
+    """
+    Check if there's a cached item that the user hasn't answered yet.
+
+    Query DB for existing item matching (tryout_id, slot, level).
+    Check if user already answered this item at this difficulty level.
+
+    Args:
+        tryout_id: Tryout identifier
+        slot: Question slot
+        level: Difficulty level
+        wp_user_id: WordPress user ID
+        website_id: Website identifier
+        db: Database session
+
+    Returns:
+        Cached item if found and user hasn't answered, None otherwise
+    """
+    # Find existing items at this slot/level
+    result = await db.execute(
+        select(Item).where(
+            and_(
+                Item.tryout_id == tryout_id,
+                Item.website_id == website_id,
+                Item.slot == slot,
+                Item.level == level,
+            )
+        )
+    )
+    existing_items = result.scalars().all()
+
+    if not existing_items:
+        return None
+
+    # Check each item to find one the user hasn't answered
+    for item in existing_items:
+        # Check if user has answered this item
+        answer_result = await db.execute(
+            select(UserAnswer).where(
+                and_(
+                    UserAnswer.item_id == item.id,
+                    UserAnswer.wp_user_id == wp_user_id,
+                )
+            )
+        )
+        user_answer = answer_result.scalar_one_or_none()
+
+        if user_answer is None:
+            # User hasn't answered this item - can reuse
+            logger.info(
+                f"Cache hit for tryout={tryout_id}, slot={slot}, level={level}, "
+                f"item_id={item.id}, user={wp_user_id}"
+            )
+            return item
+
+    # All items have been answered by this user
+    logger.info(
+        f"Cache miss (user answered all) for tryout={tryout_id}, slot={slot}, "
+        f"level={level}, user={wp_user_id}"
+    )
+    return None
+
+
+async def generate_with_cache_check(
+    tryout_id: str,
+    slot: int,
+    level: Literal["mudah", "sulit"],
+    wp_user_id: str,
+    website_id: int,
+    db: AsyncSession,
+    ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
+) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
+    """
+    Generate question with cache checking.
+
+    First checks if AI generation is enabled for the tryout.
+    Then checks for cached items the user hasn't answered.
+    If cache miss, generates new question via AI.
+
+    Args:
+        tryout_id: Tryout identifier
+        slot: Question slot
+        level: Target difficulty level
+        wp_user_id: WordPress user ID
+        website_id: Website identifier
+        db: Database session
+        ai_model: AI model to use
+
+    Returns:
+        Tuple of (item/question or None, is_cached)
+    """
+    # Check if AI generation is enabled for this tryout
+    tryout_result = await db.execute(
+        select(Tryout).where(
+            and_(
+                Tryout.tryout_id == tryout_id,
+                Tryout.website_id == website_id,
+            )
+        )
+    )
+    tryout = tryout_result.scalar_one_or_none()
+
+    if tryout and not tryout.ai_generation_enabled:
+        logger.info(f"AI generation disabled for tryout={tryout_id}")
+        # Still check cache even if AI disabled
+        cached_item = await check_cache_reuse(
+            tryout_id, slot, level, wp_user_id, website_id, db
+        )
+        if cached_item:
+            return cached_item, True
+        return None, False
+
+    # Check cache for reusable item
+    cached_item = await check_cache_reuse(
+        tryout_id, slot, level, wp_user_id, website_id, db
+    )
+
+    if cached_item:
+        return cached_item, True
+
+    # Cache miss - need to generate
+    # Get basis item (sedang level at same slot)
+    basis_result = await db.execute(
+        select(Item).where(
+            and_(
+                Item.tryout_id == tryout_id,
+                Item.website_id == website_id,
+                Item.slot == slot,
+                Item.level == "sedang",
+            )
+        ).limit(1)
+    )
+    basis_item = basis_result.scalar_one_or_none()
+
+    if not basis_item:
+        logger.error(
+            f"No basis item found for tryout={tryout_id}, slot={slot}"
+        )
+        return None, False
+
+    # Generate new question
+    generated = await generate_question(basis_item, level, ai_model)
+
+    if not generated:
+        logger.error(
+            f"Failed to generate question for tryout={tryout_id}, slot={slot}, level={level}"
+        )
+        return None, False
+
+    return generated, False
+
+
+async def save_ai_question(
+    generated_data: GeneratedQuestion,
+    tryout_id: str,
+    website_id: int,
+    basis_item_id: int,
+    slot: int,
+    level: Literal["mudah", "sedang", "sulit"],
+    ai_model: str,
+    db: AsyncSession,
+) -> Optional[int]:
+    """
+    Save AI-generated question to database.
+
+    Args:
+        generated_data: Generated question data
+        tryout_id: Tryout identifier
+        website_id: Website identifier
+        basis_item_id: Basis item ID
+        slot: Question slot
+        level: Difficulty level
+        ai_model: AI model used
+        db: Database session
+
+    Returns:
+        Created item ID or None if failed
+    """
+    try:
+        new_item = Item(
+            tryout_id=tryout_id,
+            website_id=website_id,
+            slot=slot,
+            level=level,
+            stem=generated_data.stem,
+            options=generated_data.options,
+            correct_answer=generated_data.correct,
+            explanation=generated_data.explanation,
+            generated_by="ai",
+            ai_model=ai_model,
+            basis_item_id=basis_item_id,
+            calibrated=False,
+            ctt_p=None,
+            ctt_bobot=None,
+            ctt_category=None,
+            irt_b=None,
+            irt_se=None,
+            calibration_sample_size=0,
+        )
+
+        db.add(new_item)
+        await db.flush()  # Get the ID without committing
+
+        logger.info(
+            f"Saved AI-generated item: id={new_item.id}, tryout={tryout_id}, "
+            f"slot={slot}, level={level}, model={ai_model}"
+        )
+
+        return new_item.id
+
+    except Exception as e:
+        logger.error(f"Failed to save AI-generated question: {e}")
+        return None
+
+
+async def get_ai_stats(db: AsyncSession) -> Dict[str, Any]:
+    """
+    Get AI generation statistics.
+
+    Args:
+        db: Database session
+
+    Returns:
+        Statistics dictionary
+    """
+    # Total AI-generated items
+    total_result = await db.execute(
+        select(func.count(Item.id)).where(Item.generated_by == "ai")
+    )
+    total_ai_items = total_result.scalar() or 0
+
+    # Items by model
+    model_result = await db.execute(
+        select(Item.ai_model, func.count(Item.id))
+        .where(Item.generated_by == "ai")
+        .where(Item.ai_model.isnot(None))
+        .group_by(Item.ai_model)
+    )
+    items_by_model = {row[0]: row[1] for row in model_result.all()}
+
+    # Note: Cache hit rate would need to be tracked separately
+    # This is a placeholder for now
+    return {
+        "total_ai_items": total_ai_items,
+        "items_by_model": items_by_model,
+        "cache_hit_rate": 0.0,
+        "total_cache_hits": 0,
+        "total_requests": 0,
+    }
+
+
+def validate_ai_model(model: str) -> bool:
+    """
+    Validate that the AI model is supported.
+
+    Args:
+        model: AI model identifier
+
+    Returns:
+        True if model is supported
+    """
+    return model in SUPPORTED_MODELS