Files
yellow-bank-soal/app/routers/import_export.py
2026-04-30 11:35:56 +07:00

426 lines
13 KiB
Python

"""
Import/Export API router for migration and snapshot ingestion.
Endpoints:
- POST /api/v1/import/preview: Preview Excel import without saving
- POST /api/v1/import/questions: Import questions from Excel to database
- GET /api/v1/export/questions: Export questions to Excel file
- POST /api/v1/import-export/tryout-json/preview: Preview Sejoli tryout JSON import
- POST /api/v1/import-export/tryout-json: Import Sejoli tryout JSON as read-only snapshot
"""
import os
import tempfile
import json
from fastapi import APIRouter, Depends, File, Form, HTTPException, Request, UploadFile, status
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.auth import AuthContext, get_auth_context, require_website_auth
from app.core.rate_limit import enforce_rate_limit
from app.database import get_db
from app.models import Website
from app.services.excel_import import (
bulk_insert_items,
export_questions_to_excel,
parse_excel_import,
validate_excel_structure,
)
from app.services.tryout_json_import import (
TryoutImportError,
import_tryout_json_snapshot,
preview_tryout_json_import,
)
router = APIRouter(prefix="/api/v1/import-export", tags=["import-export"])
async def ensure_website_exists(
website_id: int,
db: AsyncSession,
) -> None:
website = await db.get(Website, website_id)
if website is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=(
f"Website {website_id} not found. Website registration is stored in the database, "
"not in .env."
),
)
@router.post(
"/preview",
summary="Preview Excel import",
description="Parse Excel file and return preview without saving to database.",
)
async def preview_import(
request: Request,
file: UploadFile = File(..., description="Excel file (.xlsx)"),
auth: AuthContext = Depends(get_auth_context),
) -> dict:
"""
Preview Excel import without saving to database.
Args:
file: Excel file upload (.xlsx format)
website_id: Website ID from header
Returns:
Dict with:
- items_count: Number of items parsed
- preview: List of item previews
- validation_errors: List of validation errors if any
Raises:
HTTPException: If file format is invalid or parsing fails
"""
website_id = require_website_auth(auth, allowed_roles={"admin", "system_admin"})
enforce_rate_limit(
request,
scope="import.preview",
max_requests=30,
window_seconds=300,
)
# Validate file format
if not file.filename or not file.filename.lower().endswith('.xlsx'):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="File must be .xlsx format",
)
# Save uploaded file to temporary location
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file:
content = await file.read()
temp_file.write(content)
temp_file_path = temp_file.name
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to save uploaded file: {str(e)}",
)
try:
# Validate Excel structure
validation = validate_excel_structure(temp_file_path)
if not validation["valid"]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "Invalid Excel structure",
"validation_errors": validation["errors"],
},
)
# Parse Excel (tryout_id is optional for preview)
tryout_id = "preview" # Use dummy tryout_id for preview
result = parse_excel_import(
temp_file_path,
website_id=website_id,
tryout_id=tryout_id
)
if result["validation_errors"]:
return {
"items_count": result["items_count"],
"preview": result["items"],
"validation_errors": result["validation_errors"],
"has_errors": True,
}
# Return limited preview (first 5 items)
preview_items = result["items"][:5]
return {
"items_count": result["items_count"],
"preview": preview_items,
"validation_errors": [],
"has_errors": False,
}
finally:
# Clean up temporary file
if os.path.exists(temp_file_path):
os.unlink(temp_file_path)
@router.post(
"/questions",
summary="Import questions from Excel",
description="Parse Excel file and import questions to database with 100% data integrity.",
)
async def import_questions(
request: Request,
file: UploadFile = File(..., description="Excel file (.xlsx)"),
auth: AuthContext = Depends(get_auth_context),
tryout_id: str = Form(..., description="Tryout identifier"),
db: AsyncSession = Depends(get_db),
) -> dict:
"""
Import questions from Excel to database.
Validates file format, parses Excel content, checks for duplicates,
and performs bulk insert with rollback on error.
Args:
file: Excel file upload (.xlsx format)
website_id: Website ID from header
tryout_id: Tryout identifier
db: Async database session
Returns:
Dict with:
- imported: Number of items successfully imported
- duplicates: Number of duplicate items skipped
- errors: List of errors if any
Raises:
HTTPException: If file format is invalid, validation fails, or import fails
"""
website_id = require_website_auth(auth, allowed_roles={"admin", "system_admin"})
enforce_rate_limit(
request,
scope="import.questions",
max_requests=20,
window_seconds=300,
)
# Validate file format
if not file.filename or not file.filename.lower().endswith('.xlsx'):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="File must be .xlsx format",
)
# Save uploaded file to temporary location
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file:
content = await file.read()
temp_file.write(content)
temp_file_path = temp_file.name
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to save uploaded file: {str(e)}",
)
try:
# Validate Excel structure
validation = validate_excel_structure(temp_file_path)
if not validation["valid"]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "Invalid Excel structure",
"validation_errors": validation["errors"],
},
)
# Parse Excel
result = parse_excel_import(
temp_file_path,
website_id=website_id,
tryout_id=tryout_id
)
# Check for validation errors
if result["validation_errors"]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "Validation failed",
"validation_errors": result["validation_errors"],
},
)
# Check if items were parsed
if result["items_count"] == 0:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No items found in Excel file",
)
# Bulk insert items
insert_result = await bulk_insert_items(result["items"], db)
# Check for insertion errors
if insert_result["errors"]:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={
"error": "Import failed",
"errors": insert_result["errors"],
},
)
# Check for conflicts (duplicates)
if insert_result["duplicate_count"] > 0:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={
"message": f"Import completed with {insert_result['duplicate_count']} duplicate(s) skipped",
"imported": insert_result["inserted_count"],
"duplicates": insert_result["duplicate_count"],
},
)
return {
"message": "Import successful",
"imported": insert_result["inserted_count"],
"duplicates": insert_result["duplicate_count"],
}
finally:
# Clean up temporary file
if os.path.exists(temp_file_path):
os.unlink(temp_file_path)
@router.get(
"/export/questions",
summary="Export questions to Excel",
description="Export questions for a tryout to Excel file in standardized format.",
)
async def export_questions(
tryout_id: str,
auth: AuthContext = Depends(get_auth_context),
db: AsyncSession = Depends(get_db),
) -> FileResponse:
"""
Export questions to Excel file.
Creates Excel file with standardized format:
- Row 2: KUNCI (answer key)
- Row 4: TK (p-values)
- Row 5: BOBOT (weights)
- Rows 6+: Question data
Args:
tryout_id: Tryout identifier
website_id: Website ID from header
db: Async database session
Returns:
FileResponse with Excel file
Raises:
HTTPException: If tryout has no questions or export fails
"""
website_id = require_website_auth(auth, allowed_roles={"admin", "system_admin"})
try:
# Export questions to Excel
output_path = await export_questions_to_excel(
tryout_id=tryout_id,
website_id=website_id,
db=db
)
# Return file for download
filename = f"tryout_{tryout_id}_questions.xlsx"
return FileResponse(
path=output_path,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
filename=filename,
)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(e),
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Export failed: {str(e)}",
)
@router.post(
"/tryout-json/preview",
summary="Preview Sejoli tryout JSON import",
description="Parse a Sejoli tryout export JSON file and show snapshot diff without writing to database.",
)
async def preview_tryout_json(
request: Request,
file: UploadFile = File(..., description="Sejoli tryout export JSON"),
auth: AuthContext = Depends(get_auth_context),
db: AsyncSession = Depends(get_db),
) -> dict:
website_id = require_website_auth(auth, allowed_roles={"admin", "system_admin"})
enforce_rate_limit(
request,
scope="import.tryout_json_preview",
max_requests=30,
window_seconds=300,
)
if not file.filename or not file.filename.lower().endswith(".json"):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="File must be .json format",
)
await ensure_website_exists(website_id, db)
try:
payload = json.loads((await file.read()).decode("utf-8"))
except json.JSONDecodeError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid JSON file: {str(e)}",
)
try:
return await preview_tryout_json_import(payload, website_id, db)
except TryoutImportError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)
@router.post(
"/tryout-json",
summary="Import Sejoli tryout JSON snapshot",
description="Store Sejoli tryout export JSON as read-only snapshot data and upsert normalized reference questions.",
)
async def import_tryout_json(
request: Request,
file: UploadFile = File(..., description="Sejoli tryout export JSON"),
auth: AuthContext = Depends(get_auth_context),
db: AsyncSession = Depends(get_db),
) -> dict:
website_id = require_website_auth(auth, allowed_roles={"admin", "system_admin"})
enforce_rate_limit(
request,
scope="import.tryout_json",
max_requests=20,
window_seconds=300,
)
if not file.filename or not file.filename.lower().endswith(".json"):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="File must be .json format",
)
await ensure_website_exists(website_id, db)
try:
payload = json.loads((await file.read()).decode("utf-8"))
except json.JSONDecodeError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid JSON file: {str(e)}",
)
try:
return await import_tryout_json_snapshot(payload, website_id, db)
except TryoutImportError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)