Update AI model defaults and tiers
This commit is contained in:
@@ -18,7 +18,8 @@ ADMIN_SESSION_EXPIRE_SECONDS=3600
|
|||||||
|
|
||||||
# OpenRouter (AI Generation)
|
# OpenRouter (AI Generation)
|
||||||
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
||||||
OPENROUTER_MODEL_QWEN=qwen/qwen-2.5-coder-32b-instruct
|
OPENROUTER_MODEL_QWEN=qwen/qwen2.5-32b-instruct
|
||||||
|
OPENROUTER_MODEL_CHEAP=mistralai/mistral-small-2603
|
||||||
OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct
|
OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct
|
||||||
OPENROUTER_TIMEOUT=30
|
OPENROUTER_TIMEOUT=30
|
||||||
|
|
||||||
|
|||||||
@@ -57,12 +57,16 @@ class Settings(BaseSettings):
|
|||||||
default="", description="OpenRouter API key for AI generation"
|
default="", description="OpenRouter API key for AI generation"
|
||||||
)
|
)
|
||||||
OPENROUTER_MODEL_QWEN: str = Field(
|
OPENROUTER_MODEL_QWEN: str = Field(
|
||||||
default="qwen/qwen-2.5-coder-32b-instruct",
|
default="qwen/qwen2.5-32b-instruct",
|
||||||
description="Qwen model identifier",
|
description="Balanced Qwen model identifier",
|
||||||
|
)
|
||||||
|
OPENROUTER_MODEL_CHEAP: str = Field(
|
||||||
|
default="mistralai/mistral-small-2603",
|
||||||
|
description="Low-cost model identifier",
|
||||||
)
|
)
|
||||||
OPENROUTER_MODEL_LLAMA: str = Field(
|
OPENROUTER_MODEL_LLAMA: str = Field(
|
||||||
default="meta-llama/llama-3.3-70b-instruct",
|
default="meta-llama/llama-3.3-70b-instruct",
|
||||||
description="Llama model identifier",
|
description="Premium Llama model identifier",
|
||||||
)
|
)
|
||||||
OPENROUTER_TIMEOUT: int = Field(default=30, description="OpenRouter API timeout in seconds")
|
OPENROUTER_TIMEOUT: int = Field(default=30, description="OpenRouter API timeout in seconds")
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ from fastapi import APIRouter, Depends, HTTPException, status
|
|||||||
from sqlalchemy import and_, select
|
from sqlalchemy import and_, select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.core.config import get_settings
|
||||||
from app.database import get_db
|
from app.database import get_db
|
||||||
from app.models.item import Item
|
from app.models.item import Item
|
||||||
from app.schemas.ai import (
|
from app.schemas.ai import (
|
||||||
@@ -21,6 +22,7 @@ from app.schemas.ai import (
|
|||||||
AIStatsResponse,
|
AIStatsResponse,
|
||||||
)
|
)
|
||||||
from app.services.ai_generation import (
|
from app.services.ai_generation import (
|
||||||
|
SUPPORTED_MODELS,
|
||||||
generate_question,
|
generate_question,
|
||||||
get_ai_stats,
|
get_ai_stats,
|
||||||
save_ai_question,
|
save_ai_question,
|
||||||
@@ -28,6 +30,7 @@ from app.services.ai_generation import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
router = APIRouter(prefix="/admin/ai", tags=["admin", "ai-generation"])
|
router = APIRouter(prefix="/admin/ai", tags=["admin", "ai-generation"])
|
||||||
|
|
||||||
@@ -63,14 +66,15 @@ async def generate_preview(
|
|||||||
|
|
||||||
- **basis_item_id**: ID of the sedang-level question to base generation on
|
- **basis_item_id**: ID of the sedang-level question to base generation on
|
||||||
- **target_level**: Target difficulty (mudah/sulit)
|
- **target_level**: Target difficulty (mudah/sulit)
|
||||||
- **ai_model**: OpenRouter model to use (default: qwen/qwen-2.5-coder-32b-instruct)
|
- **ai_model**: OpenRouter model to use (default: qwen/qwen2.5-32b-instruct)
|
||||||
"""
|
"""
|
||||||
# Validate AI model
|
# Validate AI model
|
||||||
if not validate_ai_model(request.ai_model):
|
if not validate_ai_model(request.ai_model):
|
||||||
|
supported = ", ".join(SUPPORTED_MODELS.keys())
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail=f"Unsupported AI model: {request.ai_model}. "
|
detail=f"Unsupported AI model: {request.ai_model}. "
|
||||||
f"Supported models: qwen/qwen-2.5-coder-32b-instruct, meta-llama/llama-3.3-70b-instruct",
|
f"Supported models: {supported}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fetch basis item
|
# Fetch basis item
|
||||||
@@ -279,14 +283,19 @@ async def list_models() -> dict:
|
|||||||
return {
|
return {
|
||||||
"models": [
|
"models": [
|
||||||
{
|
{
|
||||||
"id": "qwen/qwen-2.5-coder-32b-instruct",
|
"id": settings.OPENROUTER_MODEL_CHEAP,
|
||||||
"name": "Qwen 2.5 Coder 32B",
|
"name": "Mistral Small 4",
|
||||||
"description": "Fast and efficient model for question generation",
|
"description": "Cheap and fast option for routine variant generation",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "meta-llama/llama-3.3-70b-instruct",
|
"id": settings.OPENROUTER_MODEL_QWEN,
|
||||||
|
"name": "Qwen 2.5 32B Instruct",
|
||||||
|
"description": "Balanced default for structured soal generation",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": settings.OPENROUTER_MODEL_LLAMA,
|
||||||
"name": "Llama 3.3 70B",
|
"name": "Llama 3.3 70B",
|
||||||
"description": "High-quality model with better reasoning",
|
"description": "Premium fallback when you want better quality over cost",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ class AIGeneratePreviewRequest(BaseModel):
|
|||||||
..., description="Target difficulty level for generated question"
|
..., description="Target difficulty level for generated question"
|
||||||
)
|
)
|
||||||
ai_model: str = Field(
|
ai_model: str = Field(
|
||||||
default="qwen/qwen-2.5-coder-32b-instruct",
|
default="qwen/qwen2.5-32b-instruct",
|
||||||
description="AI model to use for generation",
|
description="AI model to use for generation",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ class AISaveRequest(BaseModel):
|
|||||||
..., description="Difficulty level"
|
..., description="Difficulty level"
|
||||||
)
|
)
|
||||||
ai_model: str = Field(
|
ai_model: str = Field(
|
||||||
default="qwen/qwen-2.5-coder-32b-instruct",
|
default="qwen/qwen2.5-32b-instruct",
|
||||||
description="AI model used for generation",
|
description="AI model used for generation",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -28,8 +28,9 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|||||||
|
|
||||||
# Supported AI models
|
# Supported AI models
|
||||||
SUPPORTED_MODELS = {
|
SUPPORTED_MODELS = {
|
||||||
"qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B",
|
settings.OPENROUTER_MODEL_CHEAP: "Mistral Small 4 (Cheap / Fast)",
|
||||||
"meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B",
|
settings.OPENROUTER_MODEL_QWEN: "Qwen 2.5 32B Instruct (Balanced)",
|
||||||
|
settings.OPENROUTER_MODEL_LLAMA: "Llama 3.3 70B (Premium)",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Level mapping for prompts
|
# Level mapping for prompts
|
||||||
@@ -285,7 +286,7 @@ async def call_openrouter_api(
|
|||||||
async def generate_question(
|
async def generate_question(
|
||||||
basis_item: Item,
|
basis_item: Item,
|
||||||
target_level: Literal["mudah", "sulit"],
|
target_level: Literal["mudah", "sulit"],
|
||||||
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
|
ai_model: str = settings.OPENROUTER_MODEL_QWEN,
|
||||||
) -> Optional[GeneratedQuestion]:
|
) -> Optional[GeneratedQuestion]:
|
||||||
"""
|
"""
|
||||||
Generate a new question based on a basis item.
|
Generate a new question based on a basis item.
|
||||||
@@ -401,7 +402,7 @@ async def generate_with_cache_check(
|
|||||||
wp_user_id: str,
|
wp_user_id: str,
|
||||||
website_id: int,
|
website_id: int,
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
|
ai_model: str = settings.OPENROUTER_MODEL_QWEN,
|
||||||
) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
|
) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
|
||||||
"""
|
"""
|
||||||
Generate question with cache checking.
|
Generate question with cache checking.
|
||||||
|
|||||||
Reference in New Issue
Block a user