first commit
This commit is contained in:
595
app/services/ai_generation.py
Normal file
595
app/services/ai_generation.py
Normal file
@@ -0,0 +1,595 @@
|
||||
"""
|
||||
AI Question Generation Service.
|
||||
|
||||
Handles OpenRouter API integration for generating question variants.
|
||||
Implements caching, user-level reuse checking, and prompt engineering.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Dict, Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import and_, func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.models.item import Item
|
||||
from app.models.tryout import Tryout
|
||||
from app.models.user_answer import UserAnswer
|
||||
from app.schemas.ai import GeneratedQuestion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = get_settings()
|
||||
|
||||
# OpenRouter API configuration
|
||||
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||
|
||||
# Supported AI models
|
||||
SUPPORTED_MODELS = {
|
||||
"qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B",
|
||||
"meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B",
|
||||
}
|
||||
|
||||
# Level mapping for prompts
|
||||
LEVEL_DESCRIPTIONS = {
|
||||
"mudah": "easier (simpler concepts, more straightforward calculations)",
|
||||
"sedang": "medium difficulty",
|
||||
"sulit": "harder (more complex concepts, multi-step reasoning)",
|
||||
}
|
||||
|
||||
|
||||
def get_prompt_template(
|
||||
basis_stem: str,
|
||||
basis_options: Dict[str, str],
|
||||
basis_correct: str,
|
||||
basis_explanation: Optional[str],
|
||||
target_level: Literal["mudah", "sulit"],
|
||||
) -> str:
|
||||
"""
|
||||
Generate standardized prompt for AI question generation.
|
||||
|
||||
Args:
|
||||
basis_stem: The basis question stem
|
||||
basis_options: The basis question options
|
||||
basis_correct: The basis correct answer
|
||||
basis_explanation: The basis explanation
|
||||
target_level: Target difficulty level
|
||||
|
||||
Returns:
|
||||
Formatted prompt string
|
||||
"""
|
||||
level_desc = LEVEL_DESCRIPTIONS.get(target_level, target_level)
|
||||
|
||||
options_text = "\n".join(
|
||||
[f" {key}: {value}" for key, value in basis_options.items()]
|
||||
)
|
||||
|
||||
explanation_text = (
|
||||
f"Explanation: {basis_explanation}"
|
||||
if basis_explanation
|
||||
else "Explanation: (not provided)"
|
||||
)
|
||||
|
||||
prompt = f"""You are an educational content creator specializing in creating assessment questions.
|
||||
|
||||
Given a "Sedang" (medium difficulty) question, generate a new question at a different difficulty level.
|
||||
|
||||
BASIS QUESTION (Sedang level):
|
||||
Question: {basis_stem}
|
||||
Options:
|
||||
{options_text}
|
||||
Correct Answer: {basis_correct}
|
||||
{explanation_text}
|
||||
|
||||
TASK:
|
||||
Generate 1 new question that is {level_desc} than the basis question above.
|
||||
|
||||
REQUIREMENTS:
|
||||
1. Keep the SAME topic/subject matter as the basis question
|
||||
2. Use similar context and terminology
|
||||
3. Create exactly 4 answer options (A, B, C, D)
|
||||
4. Only ONE correct answer
|
||||
5. Include a clear explanation of why the correct answer is correct
|
||||
6. Make the question noticeably {level_desc} - not just a minor variation
|
||||
|
||||
OUTPUT FORMAT:
|
||||
Return ONLY a valid JSON object with this exact structure (no markdown, no code blocks):
|
||||
{{"stem": "Your question text here", "options": {{"A": "Option A text", "B": "Option B text", "C": "Option C text", "D": "Option D text"}}, "correct": "A", "explanation": "Explanation text here"}}
|
||||
|
||||
Remember: The correct field must be exactly "A", "B", "C", or "D"."""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
def parse_ai_response(response_text: str) -> Optional[GeneratedQuestion]:
|
||||
"""
|
||||
Parse AI response to extract question data.
|
||||
|
||||
Handles various response formats including JSON code blocks.
|
||||
|
||||
Args:
|
||||
response_text: Raw AI response text
|
||||
|
||||
Returns:
|
||||
GeneratedQuestion if parsing successful, None otherwise
|
||||
"""
|
||||
if not response_text:
|
||||
return None
|
||||
|
||||
# Clean the response text
|
||||
cleaned = response_text.strip()
|
||||
|
||||
# Try to extract JSON from code blocks if present
|
||||
json_patterns = [
|
||||
r"```json\s*([\s\S]*?)\s*```", # ```json ... ```
|
||||
r"```\s*([\s\S]*?)\s*```", # ``` ... ```
|
||||
r"(\{[\s\S]*\})", # Raw JSON object
|
||||
]
|
||||
|
||||
for pattern in json_patterns:
|
||||
match = re.search(pattern, cleaned)
|
||||
if match:
|
||||
json_str = match.group(1).strip()
|
||||
try:
|
||||
data = json.loads(json_str)
|
||||
return validate_and_create_question(data)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Try parsing the entire response as JSON
|
||||
try:
|
||||
data = json.loads(cleaned)
|
||||
return validate_and_create_question(data)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
logger.warning(f"Failed to parse AI response: {cleaned[:200]}...")
|
||||
return None
|
||||
|
||||
|
||||
def validate_and_create_question(data: Dict[str, Any]) -> Optional[GeneratedQuestion]:
|
||||
"""
|
||||
Validate parsed data and create GeneratedQuestion.
|
||||
|
||||
Args:
|
||||
data: Parsed JSON data
|
||||
|
||||
Returns:
|
||||
GeneratedQuestion if valid, None otherwise
|
||||
"""
|
||||
required_fields = ["stem", "options", "correct"]
|
||||
if not all(field in data for field in required_fields):
|
||||
logger.warning(f"Missing required fields in AI response: {data.keys()}")
|
||||
return None
|
||||
|
||||
# Validate options
|
||||
options = data.get("options", {})
|
||||
if not isinstance(options, dict):
|
||||
logger.warning("Options is not a dictionary")
|
||||
return None
|
||||
|
||||
required_options = {"A", "B", "C", "D"}
|
||||
if not required_options.issubset(set(options.keys())):
|
||||
logger.warning(f"Missing required options: {required_options - set(options.keys())}")
|
||||
return None
|
||||
|
||||
# Validate correct answer
|
||||
correct = str(data.get("correct", "")).upper()
|
||||
if correct not in required_options:
|
||||
logger.warning(f"Invalid correct answer: {correct}")
|
||||
return None
|
||||
|
||||
return GeneratedQuestion(
|
||||
stem=str(data["stem"]).strip(),
|
||||
options={k: str(v).strip() for k, v in options.items()},
|
||||
correct=correct,
|
||||
explanation=str(data.get("explanation", "")).strip() or None,
|
||||
)
|
||||
|
||||
|
||||
async def call_openrouter_api(
|
||||
prompt: str,
|
||||
model: str,
|
||||
max_retries: int = 3,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Call OpenRouter API to generate question.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send
|
||||
model: AI model to use
|
||||
max_retries: Maximum retry attempts
|
||||
|
||||
Returns:
|
||||
API response text or None if failed
|
||||
"""
|
||||
if not settings.OPENROUTER_API_KEY:
|
||||
logger.error("OPENROUTER_API_KEY not configured")
|
||||
return None
|
||||
|
||||
if model not in SUPPORTED_MODELS:
|
||||
logger.error(f"Unsupported AI model: {model}")
|
||||
return None
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {settings.OPENROUTER_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://github.com/irt-bank-soal",
|
||||
"X-Title": "IRT Bank Soal",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}
|
||||
],
|
||||
"max_tokens": 2000,
|
||||
"temperature": 0.7,
|
||||
}
|
||||
|
||||
timeout = httpx.Timeout(settings.OPENROUTER_TIMEOUT)
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.post(
|
||||
OPENROUTER_API_URL,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
choices = data.get("choices", [])
|
||||
if choices:
|
||||
message = choices[0].get("message", {})
|
||||
return message.get("content")
|
||||
logger.warning("No choices in OpenRouter response")
|
||||
return None
|
||||
|
||||
elif response.status_code == 429:
|
||||
# Rate limited - wait and retry
|
||||
logger.warning(f"Rate limited, attempt {attempt + 1}/{max_retries}")
|
||||
if attempt < max_retries - 1:
|
||||
import asyncio
|
||||
await asyncio.sleep(2 ** attempt)
|
||||
continue
|
||||
return None
|
||||
|
||||
else:
|
||||
logger.error(
|
||||
f"OpenRouter API error: {response.status_code} - {response.text}"
|
||||
)
|
||||
return None
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.warning(f"OpenRouter timeout, attempt {attempt + 1}/{max_retries}")
|
||||
if attempt < max_retries - 1:
|
||||
continue
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"OpenRouter API call failed: {e}")
|
||||
if attempt < max_retries - 1:
|
||||
continue
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def generate_question(
|
||||
basis_item: Item,
|
||||
target_level: Literal["mudah", "sulit"],
|
||||
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
|
||||
) -> Optional[GeneratedQuestion]:
|
||||
"""
|
||||
Generate a new question based on a basis item.
|
||||
|
||||
Args:
|
||||
basis_item: The basis item (must be sedang level)
|
||||
target_level: Target difficulty level
|
||||
ai_model: AI model to use
|
||||
|
||||
Returns:
|
||||
GeneratedQuestion if successful, None otherwise
|
||||
"""
|
||||
# Build prompt
|
||||
prompt = get_prompt_template(
|
||||
basis_stem=basis_item.stem,
|
||||
basis_options=basis_item.options,
|
||||
basis_correct=basis_item.correct_answer,
|
||||
basis_explanation=basis_item.explanation,
|
||||
target_level=target_level,
|
||||
)
|
||||
|
||||
# Call OpenRouter API
|
||||
response_text = await call_openrouter_api(prompt, ai_model)
|
||||
|
||||
if not response_text:
|
||||
logger.error("No response from OpenRouter API")
|
||||
return None
|
||||
|
||||
# Parse response
|
||||
generated = parse_ai_response(response_text)
|
||||
|
||||
if not generated:
|
||||
logger.error("Failed to parse AI response")
|
||||
return None
|
||||
|
||||
return generated
|
||||
|
||||
|
||||
async def check_cache_reuse(
|
||||
tryout_id: str,
|
||||
slot: int,
|
||||
level: str,
|
||||
wp_user_id: str,
|
||||
website_id: int,
|
||||
db: AsyncSession,
|
||||
) -> Optional[Item]:
|
||||
"""
|
||||
Check if there's a cached item that the user hasn't answered yet.
|
||||
|
||||
Query DB for existing item matching (tryout_id, slot, level).
|
||||
Check if user already answered this item at this difficulty level.
|
||||
|
||||
Args:
|
||||
tryout_id: Tryout identifier
|
||||
slot: Question slot
|
||||
level: Difficulty level
|
||||
wp_user_id: WordPress user ID
|
||||
website_id: Website identifier
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Cached item if found and user hasn't answered, None otherwise
|
||||
"""
|
||||
# Find existing items at this slot/level
|
||||
result = await db.execute(
|
||||
select(Item).where(
|
||||
and_(
|
||||
Item.tryout_id == tryout_id,
|
||||
Item.website_id == website_id,
|
||||
Item.slot == slot,
|
||||
Item.level == level,
|
||||
)
|
||||
)
|
||||
)
|
||||
existing_items = result.scalars().all()
|
||||
|
||||
if not existing_items:
|
||||
return None
|
||||
|
||||
# Check each item to find one the user hasn't answered
|
||||
for item in existing_items:
|
||||
# Check if user has answered this item
|
||||
answer_result = await db.execute(
|
||||
select(UserAnswer).where(
|
||||
and_(
|
||||
UserAnswer.item_id == item.id,
|
||||
UserAnswer.wp_user_id == wp_user_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
user_answer = answer_result.scalar_one_or_none()
|
||||
|
||||
if user_answer is None:
|
||||
# User hasn't answered this item - can reuse
|
||||
logger.info(
|
||||
f"Cache hit for tryout={tryout_id}, slot={slot}, level={level}, "
|
||||
f"item_id={item.id}, user={wp_user_id}"
|
||||
)
|
||||
return item
|
||||
|
||||
# All items have been answered by this user
|
||||
logger.info(
|
||||
f"Cache miss (user answered all) for tryout={tryout_id}, slot={slot}, "
|
||||
f"level={level}, user={wp_user_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def generate_with_cache_check(
|
||||
tryout_id: str,
|
||||
slot: int,
|
||||
level: Literal["mudah", "sulit"],
|
||||
wp_user_id: str,
|
||||
website_id: int,
|
||||
db: AsyncSession,
|
||||
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
|
||||
) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
|
||||
"""
|
||||
Generate question with cache checking.
|
||||
|
||||
First checks if AI generation is enabled for the tryout.
|
||||
Then checks for cached items the user hasn't answered.
|
||||
If cache miss, generates new question via AI.
|
||||
|
||||
Args:
|
||||
tryout_id: Tryout identifier
|
||||
slot: Question slot
|
||||
level: Target difficulty level
|
||||
wp_user_id: WordPress user ID
|
||||
website_id: Website identifier
|
||||
db: Database session
|
||||
ai_model: AI model to use
|
||||
|
||||
Returns:
|
||||
Tuple of (item/question or None, is_cached)
|
||||
"""
|
||||
# Check if AI generation is enabled for this tryout
|
||||
tryout_result = await db.execute(
|
||||
select(Tryout).where(
|
||||
and_(
|
||||
Tryout.tryout_id == tryout_id,
|
||||
Tryout.website_id == website_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
tryout = tryout_result.scalar_one_or_none()
|
||||
|
||||
if tryout and not tryout.ai_generation_enabled:
|
||||
logger.info(f"AI generation disabled for tryout={tryout_id}")
|
||||
# Still check cache even if AI disabled
|
||||
cached_item = await check_cache_reuse(
|
||||
tryout_id, slot, level, wp_user_id, website_id, db
|
||||
)
|
||||
if cached_item:
|
||||
return cached_item, True
|
||||
return None, False
|
||||
|
||||
# Check cache for reusable item
|
||||
cached_item = await check_cache_reuse(
|
||||
tryout_id, slot, level, wp_user_id, website_id, db
|
||||
)
|
||||
|
||||
if cached_item:
|
||||
return cached_item, True
|
||||
|
||||
# Cache miss - need to generate
|
||||
# Get basis item (sedang level at same slot)
|
||||
basis_result = await db.execute(
|
||||
select(Item).where(
|
||||
and_(
|
||||
Item.tryout_id == tryout_id,
|
||||
Item.website_id == website_id,
|
||||
Item.slot == slot,
|
||||
Item.level == "sedang",
|
||||
)
|
||||
).limit(1)
|
||||
)
|
||||
basis_item = basis_result.scalar_one_or_none()
|
||||
|
||||
if not basis_item:
|
||||
logger.error(
|
||||
f"No basis item found for tryout={tryout_id}, slot={slot}"
|
||||
)
|
||||
return None, False
|
||||
|
||||
# Generate new question
|
||||
generated = await generate_question(basis_item, level, ai_model)
|
||||
|
||||
if not generated:
|
||||
logger.error(
|
||||
f"Failed to generate question for tryout={tryout_id}, slot={slot}, level={level}"
|
||||
)
|
||||
return None, False
|
||||
|
||||
return generated, False
|
||||
|
||||
|
||||
async def save_ai_question(
|
||||
generated_data: GeneratedQuestion,
|
||||
tryout_id: str,
|
||||
website_id: int,
|
||||
basis_item_id: int,
|
||||
slot: int,
|
||||
level: Literal["mudah", "sedang", "sulit"],
|
||||
ai_model: str,
|
||||
db: AsyncSession,
|
||||
) -> Optional[int]:
|
||||
"""
|
||||
Save AI-generated question to database.
|
||||
|
||||
Args:
|
||||
generated_data: Generated question data
|
||||
tryout_id: Tryout identifier
|
||||
website_id: Website identifier
|
||||
basis_item_id: Basis item ID
|
||||
slot: Question slot
|
||||
level: Difficulty level
|
||||
ai_model: AI model used
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Created item ID or None if failed
|
||||
"""
|
||||
try:
|
||||
new_item = Item(
|
||||
tryout_id=tryout_id,
|
||||
website_id=website_id,
|
||||
slot=slot,
|
||||
level=level,
|
||||
stem=generated_data.stem,
|
||||
options=generated_data.options,
|
||||
correct_answer=generated_data.correct,
|
||||
explanation=generated_data.explanation,
|
||||
generated_by="ai",
|
||||
ai_model=ai_model,
|
||||
basis_item_id=basis_item_id,
|
||||
calibrated=False,
|
||||
ctt_p=None,
|
||||
ctt_bobot=None,
|
||||
ctt_category=None,
|
||||
irt_b=None,
|
||||
irt_se=None,
|
||||
calibration_sample_size=0,
|
||||
)
|
||||
|
||||
db.add(new_item)
|
||||
await db.flush() # Get the ID without committing
|
||||
|
||||
logger.info(
|
||||
f"Saved AI-generated item: id={new_item.id}, tryout={tryout_id}, "
|
||||
f"slot={slot}, level={level}, model={ai_model}"
|
||||
)
|
||||
|
||||
return new_item.id
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save AI-generated question: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def get_ai_stats(db: AsyncSession) -> Dict[str, Any]:
|
||||
"""
|
||||
Get AI generation statistics.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Statistics dictionary
|
||||
"""
|
||||
# Total AI-generated items
|
||||
total_result = await db.execute(
|
||||
select(func.count(Item.id)).where(Item.generated_by == "ai")
|
||||
)
|
||||
total_ai_items = total_result.scalar() or 0
|
||||
|
||||
# Items by model
|
||||
model_result = await db.execute(
|
||||
select(Item.ai_model, func.count(Item.id))
|
||||
.where(Item.generated_by == "ai")
|
||||
.where(Item.ai_model.isnot(None))
|
||||
.group_by(Item.ai_model)
|
||||
)
|
||||
items_by_model = {row[0]: row[1] for row in model_result.all()}
|
||||
|
||||
# Note: Cache hit rate would need to be tracked separately
|
||||
# This is a placeholder for now
|
||||
return {
|
||||
"total_ai_items": total_ai_items,
|
||||
"items_by_model": items_by_model,
|
||||
"cache_hit_rate": 0.0,
|
||||
"total_cache_hits": 0,
|
||||
"total_requests": 0,
|
||||
}
|
||||
|
||||
|
||||
def validate_ai_model(model: str) -> bool:
|
||||
"""
|
||||
Validate that the AI model is supported.
|
||||
|
||||
Args:
|
||||
model: AI model identifier
|
||||
|
||||
Returns:
|
||||
True if model is supported
|
||||
"""
|
||||
return model in SUPPORTED_MODELS
|
||||
Reference in New Issue
Block a user