diff --git a/.env.example b/.env.example index 8c6432e..e1907d3 100644 --- a/.env.example +++ b/.env.example @@ -18,7 +18,8 @@ ADMIN_SESSION_EXPIRE_SECONDS=3600 # OpenRouter (AI Generation) OPENROUTER_API_KEY=your-openrouter-api-key-here -OPENROUTER_MODEL_QWEN=qwen/qwen-2.5-coder-32b-instruct +OPENROUTER_MODEL_QWEN=qwen/qwen2.5-32b-instruct +OPENROUTER_MODEL_CHEAP=mistralai/mistral-small-2603 OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct OPENROUTER_TIMEOUT=30 diff --git a/app/core/config.py b/app/core/config.py index cb19a40..73c1e30 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -57,12 +57,16 @@ class Settings(BaseSettings): default="", description="OpenRouter API key for AI generation" ) OPENROUTER_MODEL_QWEN: str = Field( - default="qwen/qwen-2.5-coder-32b-instruct", - description="Qwen model identifier", + default="qwen/qwen2.5-32b-instruct", + description="Balanced Qwen model identifier", + ) + OPENROUTER_MODEL_CHEAP: str = Field( + default="mistralai/mistral-small-2603", + description="Low-cost model identifier", ) OPENROUTER_MODEL_LLAMA: str = Field( default="meta-llama/llama-3.3-70b-instruct", - description="Llama model identifier", + description="Premium Llama model identifier", ) OPENROUTER_TIMEOUT: int = Field(default=30, description="OpenRouter API timeout in seconds") diff --git a/app/routers/ai.py b/app/routers/ai.py index 7815ac6..f65536a 100644 --- a/app/routers/ai.py +++ b/app/routers/ai.py @@ -11,6 +11,7 @@ from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy import and_, select from sqlalchemy.ext.asyncio import AsyncSession +from app.core.config import get_settings from app.database import get_db from app.models.item import Item from app.schemas.ai import ( @@ -21,6 +22,7 @@ from app.schemas.ai import ( AIStatsResponse, ) from app.services.ai_generation import ( + SUPPORTED_MODELS, generate_question, get_ai_stats, save_ai_question, @@ -28,6 +30,7 @@ from app.services.ai_generation import ( ) logger = logging.getLogger(__name__) +settings = get_settings() router = APIRouter(prefix="/admin/ai", tags=["admin", "ai-generation"]) @@ -63,14 +66,15 @@ async def generate_preview( - **basis_item_id**: ID of the sedang-level question to base generation on - **target_level**: Target difficulty (mudah/sulit) - - **ai_model**: OpenRouter model to use (default: qwen/qwen-2.5-coder-32b-instruct) + - **ai_model**: OpenRouter model to use (default: qwen/qwen2.5-32b-instruct) """ # Validate AI model if not validate_ai_model(request.ai_model): + supported = ", ".join(SUPPORTED_MODELS.keys()) raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Unsupported AI model: {request.ai_model}. " - f"Supported models: qwen/qwen-2.5-coder-32b-instruct, meta-llama/llama-3.3-70b-instruct", + f"Supported models: {supported}", ) # Fetch basis item @@ -279,14 +283,19 @@ async def list_models() -> dict: return { "models": [ { - "id": "qwen/qwen-2.5-coder-32b-instruct", - "name": "Qwen 2.5 Coder 32B", - "description": "Fast and efficient model for question generation", + "id": settings.OPENROUTER_MODEL_CHEAP, + "name": "Mistral Small 4", + "description": "Cheap and fast option for routine variant generation", }, { - "id": "meta-llama/llama-3.3-70b-instruct", + "id": settings.OPENROUTER_MODEL_QWEN, + "name": "Qwen 2.5 32B Instruct", + "description": "Balanced default for structured soal generation", + }, + { + "id": settings.OPENROUTER_MODEL_LLAMA, "name": "Llama 3.3 70B", - "description": "High-quality model with better reasoning", + "description": "Premium fallback when you want better quality over cost", }, ] } diff --git a/app/schemas/ai.py b/app/schemas/ai.py index 199cfb7..5168f21 100644 --- a/app/schemas/ai.py +++ b/app/schemas/ai.py @@ -17,7 +17,7 @@ class AIGeneratePreviewRequest(BaseModel): ..., description="Target difficulty level for generated question" ) ai_model: str = Field( - default="qwen/qwen-2.5-coder-32b-instruct", + default="qwen/qwen2.5-32b-instruct", description="AI model to use for generation", ) @@ -50,7 +50,7 @@ class AISaveRequest(BaseModel): ..., description="Difficulty level" ) ai_model: str = Field( - default="qwen/qwen-2.5-coder-32b-instruct", + default="qwen/qwen2.5-32b-instruct", description="AI model used for generation", ) diff --git a/app/services/ai_generation.py b/app/services/ai_generation.py index 609db7b..8993f89 100644 --- a/app/services/ai_generation.py +++ b/app/services/ai_generation.py @@ -28,8 +28,9 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" # Supported AI models SUPPORTED_MODELS = { - "qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B", - "meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B", + settings.OPENROUTER_MODEL_CHEAP: "Mistral Small 4 (Cheap / Fast)", + settings.OPENROUTER_MODEL_QWEN: "Qwen 2.5 32B Instruct (Balanced)", + settings.OPENROUTER_MODEL_LLAMA: "Llama 3.3 70B (Premium)", } # Level mapping for prompts @@ -285,7 +286,7 @@ async def call_openrouter_api( async def generate_question( basis_item: Item, target_level: Literal["mudah", "sulit"], - ai_model: str = "qwen/qwen-2.5-coder-32b-instruct", + ai_model: str = settings.OPENROUTER_MODEL_QWEN, ) -> Optional[GeneratedQuestion]: """ Generate a new question based on a basis item. @@ -401,7 +402,7 @@ async def generate_with_cache_check( wp_user_id: str, website_id: int, db: AsyncSession, - ai_model: str = "qwen/qwen-2.5-coder-32b-instruct", + ai_model: str = settings.OPENROUTER_MODEL_QWEN, ) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]: """ Generate question with cache checking.