diff --git a/.env.example b/.env.example
index 8c6432e..e1907d3 100644
--- a/.env.example
+++ b/.env.example
@@ -18,7 +18,8 @@ ADMIN_SESSION_EXPIRE_SECONDS=3600
 
 # OpenRouter (AI Generation)
 OPENROUTER_API_KEY=your-openrouter-api-key-here
-OPENROUTER_MODEL_QWEN=qwen/qwen-2.5-coder-32b-instruct
+OPENROUTER_MODEL_QWEN=qwen/qwen2.5-32b-instruct
+OPENROUTER_MODEL_CHEAP=mistralai/mistral-small-2603
 OPENROUTER_MODEL_LLAMA=meta-llama/llama-3.3-70b-instruct
 OPENROUTER_TIMEOUT=30
 
diff --git a/app/core/config.py b/app/core/config.py
index cb19a40..73c1e30 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -57,12 +57,16 @@ class Settings(BaseSettings):
         default="", description="OpenRouter API key for AI generation"
     )
     OPENROUTER_MODEL_QWEN: str = Field(
-        default="qwen/qwen-2.5-coder-32b-instruct",
-        description="Qwen model identifier",
+        default="qwen/qwen2.5-32b-instruct",
+        description="Balanced Qwen model identifier",
+    )
+    OPENROUTER_MODEL_CHEAP: str = Field(
+        default="mistralai/mistral-small-2603",
+        description="Low-cost model identifier",
     )
     OPENROUTER_MODEL_LLAMA: str = Field(
         default="meta-llama/llama-3.3-70b-instruct",
-        description="Llama model identifier",
+        description="Premium Llama model identifier",
     )
     OPENROUTER_TIMEOUT: int = Field(default=30, description="OpenRouter API timeout in seconds")
 
diff --git a/app/routers/ai.py b/app/routers/ai.py
index 7815ac6..f65536a 100644
--- a/app/routers/ai.py
+++ b/app/routers/ai.py
@@ -11,6 +11,7 @@ from fastapi import APIRouter, Depends, HTTPException, status
 from sqlalchemy import and_, select
 from sqlalchemy.ext.asyncio import AsyncSession
 
+from app.core.config import get_settings
 from app.database import get_db
 from app.models.item import Item
 from app.schemas.ai import (
@@ -21,6 +22,7 @@ from app.schemas.ai import (
     AIStatsResponse,
 )
 from app.services.ai_generation import (
+    SUPPORTED_MODELS,
     generate_question,
     get_ai_stats,
     save_ai_question,
@@ -28,6 +30,7 @@ from app.services.ai_generation import (
 )
 
 logger = logging.getLogger(__name__)
+settings = get_settings()
 
 router = APIRouter(prefix="/admin/ai", tags=["admin", "ai-generation"])
 
@@ -63,14 +66,15 @@ async def generate_preview(
     
     - **basis_item_id**: ID of the sedang-level question to base generation on
     - **target_level**: Target difficulty (mudah/sulit)
-    - **ai_model**: OpenRouter model to use (default: qwen/qwen-2.5-coder-32b-instruct)
+    - **ai_model**: OpenRouter model to use (default: qwen/qwen2.5-32b-instruct)
     """
     # Validate AI model
     if not validate_ai_model(request.ai_model):
+        supported = ", ".join(SUPPORTED_MODELS.keys())
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=f"Unsupported AI model: {request.ai_model}. "
-            f"Supported models: qwen/qwen-2.5-coder-32b-instruct, meta-llama/llama-3.3-70b-instruct",
+            f"Supported models: {supported}",
         )
 
     # Fetch basis item
@@ -279,14 +283,19 @@ async def list_models() -> dict:
     return {
         "models": [
             {
-                "id": "qwen/qwen-2.5-coder-32b-instruct",
-                "name": "Qwen 2.5 Coder 32B",
-                "description": "Fast and efficient model for question generation",
+                "id": settings.OPENROUTER_MODEL_CHEAP,
+                "name": "Mistral Small 4",
+                "description": "Cheap and fast option for routine variant generation",
             },
             {
-                "id": "meta-llama/llama-3.3-70b-instruct",
+                "id": settings.OPENROUTER_MODEL_QWEN,
+                "name": "Qwen 2.5 32B Instruct",
+                "description": "Balanced default for structured soal generation",
+            },
+            {
+                "id": settings.OPENROUTER_MODEL_LLAMA,
                 "name": "Llama 3.3 70B",
-                "description": "High-quality model with better reasoning",
+                "description": "Premium fallback when you want better quality over cost",
             },
         ]
     }
diff --git a/app/schemas/ai.py b/app/schemas/ai.py
index 199cfb7..5168f21 100644
--- a/app/schemas/ai.py
+++ b/app/schemas/ai.py
@@ -17,7 +17,7 @@ class AIGeneratePreviewRequest(BaseModel):
         ..., description="Target difficulty level for generated question"
     )
     ai_model: str = Field(
-        default="qwen/qwen-2.5-coder-32b-instruct",
+        default="qwen/qwen2.5-32b-instruct",
         description="AI model to use for generation",
     )
 
@@ -50,7 +50,7 @@ class AISaveRequest(BaseModel):
         ..., description="Difficulty level"
     )
     ai_model: str = Field(
-        default="qwen/qwen-2.5-coder-32b-instruct",
+        default="qwen/qwen2.5-32b-instruct",
         description="AI model used for generation",
     )
 
diff --git a/app/services/ai_generation.py b/app/services/ai_generation.py
index 609db7b..8993f89 100644
--- a/app/services/ai_generation.py
+++ b/app/services/ai_generation.py
@@ -28,8 +28,9 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
 
 # Supported AI models
 SUPPORTED_MODELS = {
-    "qwen/qwen-2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B",
-    "meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B",
+    settings.OPENROUTER_MODEL_CHEAP: "Mistral Small 4 (Cheap / Fast)",
+    settings.OPENROUTER_MODEL_QWEN: "Qwen 2.5 32B Instruct (Balanced)",
+    settings.OPENROUTER_MODEL_LLAMA: "Llama 3.3 70B (Premium)",
 }
 
 # Level mapping for prompts
@@ -285,7 +286,7 @@ async def call_openrouter_api(
 async def generate_question(
     basis_item: Item,
     target_level: Literal["mudah", "sulit"],
-    ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
+    ai_model: str = settings.OPENROUTER_MODEL_QWEN,
 ) -> Optional[GeneratedQuestion]:
     """
     Generate a new question based on a basis item.
@@ -401,7 +402,7 @@ async def generate_with_cache_check(
     wp_user_id: str,
     website_id: int,
     db: AsyncSession,
-    ai_model: str = "qwen/qwen-2.5-coder-32b-instruct",
+    ai_model: str = settings.OPENROUTER_MODEL_QWEN,
 ) -> tuple[Optional[Union[Item, GeneratedQuestion]], bool]:
     """
     Generate question with cache checking.