Update AI model defaults and tiers

This commit is contained in:
dwindown
2026-04-02 08:04:16 +07:00
parent 6ccfef2af2
commit 51c577be05
5 changed files with 32 additions and 17 deletions

View File

@@ -18,7 +18,8 @@ ADMIN_SESSION_EXPIRE_SECONDS=3600
# OpenRouter (AI Generation) # OpenRouter (AI Generation)
OPENROUTER_API_KEY=your-openrouter-api-key-here OPENROUTER_API_KEY=your-openrouter-api-key-here
OPENROUTER_MODEL_QWEN=qwen/qwen-2.5-coder-32b-instruct OPENROUTER_MODEL_QWEN=qwen/qwen2.5-32b-instruct
OPENROUTER_MODEL_CHEAP=mistralai/mistral-small-2603
OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct
OPENROUTER_TIMEOUT=30 OPENROUTER_TIMEOUT=30

View File

@@ -57,12 +57,16 @@ class Settings(BaseSettings):
default="", description="OpenRouter API key for AI generation" default="", description="OpenRouter API key for AI generation"
) )
OPENROUTER_MODEL_QWEN: str = Field( OPENROUTER_MODEL_QWEN: str = Field(
default="qwen/qwen-2.5-coder-32b-instruct", default="qwen/qwen2.5-32b-instruct",
description="Qwen model identifier", description="Balanced Qwen model identifier",
)
OPENROUTER_MODEL_CHEAP: str = Field(
default="mistralai/mistral-small-2603",
description="Low-cost model identifier",
) )
OPENROUTER_MODEL_LLAMA: str = Field( OPENROUTER_MODEL_LLAMA: str = Field(
default="meta-llama/llama-3.3-70b-instruct", default="meta-llama/llama-3.3-70b-instruct",
description="Llama model identifier", description="Premium Llama model identifier",
) )
OPENROUTER_TIMEOUT: int = Field(default=30, description="OpenRouter API timeout in seconds") OPENROUTER_TIMEOUT: int = Field(default=30, description="OpenRouter API timeout in seconds")

View File

@@ -11,6 +11,7 @@ from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import and_, select from sqlalchemy import and_, select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import get_settings
from app.database import get_db from app.database import get_db
from app.models.item import Item from app.models.item import Item
from app.schemas.ai import ( from app.schemas.ai import (
@@ -21,6 +22,7 @@ from app.schemas.ai import (
AIStatsResponse, AIStatsResponse,
) )
from app.services.ai_generation import ( from app.services.ai_generation import (
SUPPORTED_MODELS,
generate_question, generate_question,
get_ai_stats, get_ai_stats,
save_ai_question, save_ai_question,
@@ -28,6 +30,7 @@ from app.services.ai_generation import (
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
settings = get_settings()
router = APIRouter(prefix="/admin/ai", tags=["admin", "ai-generation"]) router = APIRouter(prefix="/admin/ai", tags=["admin", "ai-generation"])
@@ -63,14 +66,15 @@ async def generate_preview(
- **basis_item_id**: ID of the sedang-level question to base generation on - **basis_item_id**: ID of the sedang-level question to base generation on
- **target_level**: Target difficulty (mudah/sulit) - **target_level**: Target difficulty (mudah/sulit)
- **ai_model**: OpenRouter model to use (default: qwen/qwen-2.5-coder-32b-instruct) - **ai_model**: OpenRouter model to use (default: qwen/qwen2.5-32b-instruct)
""" """
# Validate AI model # Validate AI model
if not validate_ai_model(request.ai_model): if not validate_ai_model(request.ai_model):
supported = ", ".join(SUPPORTED_MODELS.keys())
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported AI model: {request.ai_model}. " detail=f"Unsupported AI model: {request.ai_model}. "
f"Supported models: qwen/qwen-2.5-coder-32b-instruct, meta-llama/llama-3.3-70b-instruct", f"Supported models: {supported}",
) )
# Fetch basis item # Fetch basis item
@@ -279,14 +283,19 @@ async def list_models() -> dict:
return { return {
"models": [ "models": [
{ {
"id": "qwen/qwen-2.5-coder-32b-instruct", "id": settings.OPENROUTER_MODEL_CHEAP,
"name": "Qwen 2.5 Coder 32B", "name": "Mistral Small 4",
"description": "Fast and efficient model for question generation", "description": "Cheap and fast option for routine variant generation",
}, },
{ {
"id": "meta-llama/llama-3.3-70b-instruct", "id": settings.OPENROUTER_MODEL_QWEN,
"name": "Qwen 2.5 32B Instruct",
"description": "Balanced default for structured soal generation",
},
{
"id": settings.OPENROUTER_MODEL_LLAMA,
"name": "Llama 3.3 70B", "name": "Llama 3.3 70B",
"description": "High-quality model with better reasoning", "description": "Premium fallback when you want better quality over cost",
}, },
] ]
} }

View File

@@ -17,7 +17,7 @@ class AIGeneratePreviewRequest(BaseModel):
..., description="Target difficulty level for generated question" ..., description="Target difficulty level for generated question"
) )
ai_model: str = Field( ai_model: str = Field(
default="qwen/qwen-2.5-coder-32b-instruct", default="qwen/qwen2.5-32b-instruct",
description="AI model to use for generation", description="AI model to use for generation",
) )
@@ -50,7 +50,7 @@ class AISaveRequest(BaseModel):
..., description="Difficulty level" ..., description="Difficulty level"
) )
ai_model: str = Field( ai_model: str = Field(
default="qwen/qwen-2.5-coder-32b-instruct", default="qwen/qwen2.5-32b-instruct",
description="AI model used for generation", description="AI model used for generation",
) )

View File

@@ -28,8 +28,9 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
# Supported AI models # Supported AI models
SUPPORTED_MODELS = { SUPPORTED_MODELS = {
"qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B", settings.OPENROUTER_MODEL_CHEAP: "Mistral Small 4 (Cheap / Fast)",
"meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B", settings.OPENROUTER_MODEL_QWEN: "Qwen 2.5 32B Instruct (Balanced)",
settings.OPENROUTER_MODEL_LLAMA: "Llama 3.3 70B (Premium)",
} }
# Level mapping for prompts # Level mapping for prompts
@@ -285,7 +286,7 @@ async def call_openrouter_api(
async def generate_question( async def generate_question(
basis_item: Item, basis_item: Item,
target_level: Literal["mudah", "sulit"], target_level: Literal["mudah", "sulit"],
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct", ai_model: str = settings.OPENROUTER_MODEL_QWEN,
) -> Optional[GeneratedQuestion]: ) -> Optional[GeneratedQuestion]:
""" """
Generate a new question based on a basis item. Generate a new question based on a basis item.
@@ -401,7 +402,7 @@ async def generate_with_cache_check(
wp_user_id: str, wp_user_id: str,
website_id: int, website_id: int,
db: AsyncSession, db: AsyncSession,
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct", ai_model: str = settings.OPENROUTER_MODEL_QWEN,
) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]: ) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
""" """
Generate question with cache checking. Generate question with cache checking.