Add Sejoli tryout JSON snapshot importer
This commit is contained in:
118
alembic/versions/20260402_000002_tryout_json_snapshots.py
Normal file
118
alembic/versions/20260402_000002_tryout_json_snapshots.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""add tryout JSON snapshot tables
|
||||
|
||||
Revision ID: 20260402_000002
|
||||
Revises: 20260331_000001
|
||||
Create Date: 2026-04-02 11:30:00
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
revision: str = "20260402_000002"
|
||||
down_revision: Union[str, None] = "20260331_000001"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"tryout_import_snapshots",
|
||||
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
|
||||
sa.Column("website_id", sa.Integer(), nullable=False),
|
||||
sa.Column("source_tryout_id", sa.String(length=255), nullable=False),
|
||||
sa.Column("source_key", sa.String(length=255), nullable=False),
|
||||
sa.Column("title", sa.String(length=255), nullable=False),
|
||||
sa.Column("source_permalink", sa.String(length=1024), nullable=True),
|
||||
sa.Column("source_status", sa.String(length=50), nullable=True),
|
||||
sa.Column("exported_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column("source_created_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column("source_modified_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column("exported_by", sa.String(length=255), nullable=True),
|
||||
sa.Column("question_count", sa.Integer(), nullable=False),
|
||||
sa.Column("result_count", sa.Integer(), nullable=False),
|
||||
sa.Column("payload_checksum", sa.String(length=64), nullable=False),
|
||||
sa.Column("raw_payload", sa.JSON(), nullable=False),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.ForeignKeyConstraint(["website_id"], ["websites.id"], ondelete="CASCADE", onupdate="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_tryout_import_snapshots_website_id",
|
||||
"tryout_import_snapshots",
|
||||
["website_id"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_tryout_import_snapshots_source_tryout_id",
|
||||
"tryout_import_snapshots",
|
||||
["source_tryout_id"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"tryout_snapshot_questions",
|
||||
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
|
||||
sa.Column("website_id", sa.Integer(), nullable=False),
|
||||
sa.Column("source_tryout_id", sa.String(length=255), nullable=False),
|
||||
sa.Column("source_question_id", sa.String(length=255), nullable=False),
|
||||
sa.Column("latest_snapshot_id", sa.Integer(), nullable=True),
|
||||
sa.Column("question_title", sa.Text(), nullable=False),
|
||||
sa.Column("question_html", sa.Text(), nullable=False),
|
||||
sa.Column("explanation_html", sa.Text(), nullable=True),
|
||||
sa.Column("raw_options", sa.JSON(), nullable=False),
|
||||
sa.Column("correct_answer", sa.String(length=10), nullable=False),
|
||||
sa.Column("category_id", sa.Integer(), nullable=True),
|
||||
sa.Column("category_name", sa.String(length=255), nullable=True),
|
||||
sa.Column("category_code", sa.String(length=255), nullable=True),
|
||||
sa.Column("option_count", sa.Integer(), nullable=False),
|
||||
sa.Column("has_option_labels", sa.Boolean(), nullable=False),
|
||||
sa.Column("is_active", sa.Boolean(), nullable=False),
|
||||
sa.Column("content_checksum", sa.String(length=64), nullable=False),
|
||||
sa.Column("raw_payload", sa.JSON(), nullable=False),
|
||||
sa.Column("first_seen_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("last_seen_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.ForeignKeyConstraint(["website_id"], ["websites.id"], ondelete="CASCADE", onupdate="CASCADE"),
|
||||
sa.ForeignKeyConstraint(["latest_snapshot_id"], ["tryout_import_snapshots.id"], ondelete="SET NULL", onupdate="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint(
|
||||
"website_id",
|
||||
"source_tryout_id",
|
||||
"source_question_id",
|
||||
name="uq_snapshot_questions_website_tryout_question",
|
||||
),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_tryout_snapshot_questions_website_id",
|
||||
"tryout_snapshot_questions",
|
||||
["website_id"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_tryout_snapshot_questions_source_tryout_id",
|
||||
"tryout_snapshot_questions",
|
||||
["source_tryout_id"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_tryout_snapshot_questions_latest_snapshot_id",
|
||||
"tryout_snapshot_questions",
|
||||
["latest_snapshot_id"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_tryout_snapshot_questions_latest_snapshot_id", table_name="tryout_snapshot_questions")
|
||||
op.drop_index("ix_tryout_snapshot_questions_source_tryout_id", table_name="tryout_snapshot_questions")
|
||||
op.drop_index("ix_tryout_snapshot_questions_website_id", table_name="tryout_snapshot_questions")
|
||||
op.drop_table("tryout_snapshot_questions")
|
||||
|
||||
op.drop_index("ix_tryout_import_snapshots_source_tryout_id", table_name="tryout_import_snapshots")
|
||||
op.drop_index("ix_tryout_import_snapshots_website_id", table_name="tryout_import_snapshots")
|
||||
op.drop_table("tryout_import_snapshots")
|
||||
@@ -8,6 +8,8 @@ from app.database import Base
|
||||
from app.models.item import Item
|
||||
from app.models.session import Session
|
||||
from app.models.tryout import Tryout
|
||||
from app.models.tryout_import_snapshot import TryoutImportSnapshot
|
||||
from app.models.tryout_snapshot_question import TryoutSnapshotQuestion
|
||||
from app.models.tryout_stats import TryoutStats
|
||||
from app.models.user import User
|
||||
from app.models.user_answer import UserAnswer
|
||||
@@ -18,6 +20,8 @@ __all__ = [
|
||||
"User",
|
||||
"Website",
|
||||
"Tryout",
|
||||
"TryoutImportSnapshot",
|
||||
"TryoutSnapshotQuestion",
|
||||
"Item",
|
||||
"Session",
|
||||
"UserAnswer",
|
||||
|
||||
103
app/models/tryout_import_snapshot.py
Normal file
103
app/models/tryout_import_snapshot.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Snapshot archive for imported external tryout payloads.
|
||||
|
||||
Stores each imported JSON export so the backend can trace source changes
|
||||
without treating the source file itself as the system of record.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, JSON, String, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class TryoutImportSnapshot(Base):
|
||||
__tablename__ = "tryout_import_snapshots"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
website_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Website identifier",
|
||||
)
|
||||
source_tryout_id: Mapped[str] = mapped_column(
|
||||
String(255),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="External source tryout identifier",
|
||||
)
|
||||
source_key: Mapped[str] = mapped_column(
|
||||
String(255),
|
||||
nullable=False,
|
||||
comment="External tryout object key in source payload",
|
||||
)
|
||||
title: Mapped[str] = mapped_column(
|
||||
String(255),
|
||||
nullable=False,
|
||||
comment="Imported tryout title",
|
||||
)
|
||||
source_permalink: Mapped[Optional[str]] = mapped_column(
|
||||
String(1024),
|
||||
nullable=True,
|
||||
comment="Imported source permalink",
|
||||
)
|
||||
source_status: Mapped[Optional[str]] = mapped_column(
|
||||
String(50),
|
||||
nullable=True,
|
||||
comment="Imported source status",
|
||||
)
|
||||
exported_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
comment="Timestamp from source export metadata",
|
||||
)
|
||||
source_created_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
comment="Source tryout created timestamp",
|
||||
)
|
||||
source_modified_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
comment="Source tryout modified timestamp",
|
||||
)
|
||||
exported_by: Mapped[Optional[str]] = mapped_column(
|
||||
String(255),
|
||||
nullable=True,
|
||||
comment="Source exporter identity",
|
||||
)
|
||||
question_count: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
default=0,
|
||||
comment="Number of questions in imported payload",
|
||||
)
|
||||
result_count: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
default=0,
|
||||
comment="Number of result rows in imported payload",
|
||||
)
|
||||
payload_checksum: Mapped[str] = mapped_column(
|
||||
String(64),
|
||||
nullable=False,
|
||||
comment="Checksum for the imported payload",
|
||||
)
|
||||
raw_payload: Mapped[dict] = mapped_column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
comment="Original imported payload",
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
)
|
||||
139
app/models/tryout_snapshot_question.py
Normal file
139
app/models/tryout_snapshot_question.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""
|
||||
Read-only normalized reference rows for imported tryout questions.
|
||||
|
||||
These rows reflect the latest imported source version of each question and are
|
||||
kept separate from operational items and AI-generated variants.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, JSON, String, Text, UniqueConstraint, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class TryoutSnapshotQuestion(Base):
|
||||
__tablename__ = "tryout_snapshot_questions"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
website_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("websites.id", ondelete="CASCADE", onupdate="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Website identifier",
|
||||
)
|
||||
source_tryout_id: Mapped[str] = mapped_column(
|
||||
String(255),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="External source tryout identifier",
|
||||
)
|
||||
source_question_id: Mapped[str] = mapped_column(
|
||||
String(255),
|
||||
nullable=False,
|
||||
comment="External source question identifier",
|
||||
)
|
||||
latest_snapshot_id: Mapped[Optional[int]] = mapped_column(
|
||||
ForeignKey("tryout_import_snapshots.id", ondelete="SET NULL", onupdate="CASCADE"),
|
||||
nullable=True,
|
||||
index=True,
|
||||
comment="Latest snapshot containing this question",
|
||||
)
|
||||
question_title: Mapped[str] = mapped_column(
|
||||
Text,
|
||||
nullable=False,
|
||||
comment="Imported title or short label",
|
||||
)
|
||||
question_html: Mapped[str] = mapped_column(
|
||||
Text,
|
||||
nullable=False,
|
||||
comment="Imported question body HTML",
|
||||
)
|
||||
explanation_html: Mapped[Optional[str]] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Imported explanation HTML",
|
||||
)
|
||||
raw_options: Mapped[list] = mapped_column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
comment="Raw source options payload",
|
||||
)
|
||||
correct_answer: Mapped[str] = mapped_column(
|
||||
String(10),
|
||||
nullable=False,
|
||||
comment="Imported correct answer key",
|
||||
)
|
||||
category_id: Mapped[Optional[int]] = mapped_column(
|
||||
Integer,
|
||||
nullable=True,
|
||||
comment="Imported category id",
|
||||
)
|
||||
category_name: Mapped[Optional[str]] = mapped_column(
|
||||
String(255),
|
||||
nullable=True,
|
||||
comment="Imported category name",
|
||||
)
|
||||
category_code: Mapped[Optional[str]] = mapped_column(
|
||||
String(255),
|
||||
nullable=True,
|
||||
comment="Imported category code",
|
||||
)
|
||||
option_count: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
default=0,
|
||||
comment="Count of source options",
|
||||
)
|
||||
has_option_labels: Mapped[bool] = mapped_column(
|
||||
Boolean,
|
||||
nullable=False,
|
||||
default=False,
|
||||
comment="Whether source options include visible labels",
|
||||
)
|
||||
is_active: Mapped[bool] = mapped_column(
|
||||
Boolean,
|
||||
nullable=False,
|
||||
default=True,
|
||||
comment="Whether question is still present in latest source import",
|
||||
)
|
||||
content_checksum: Mapped[str] = mapped_column(
|
||||
String(64),
|
||||
nullable=False,
|
||||
comment="Checksum of normalized question content",
|
||||
)
|
||||
raw_payload: Mapped[dict] = mapped_column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
comment="Original source question payload",
|
||||
)
|
||||
first_seen_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
)
|
||||
last_seen_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"website_id",
|
||||
"source_tryout_id",
|
||||
"source_question_id",
|
||||
name="uq_snapshot_questions_website_tryout_question",
|
||||
),
|
||||
)
|
||||
@@ -1,14 +1,17 @@
|
||||
"""
|
||||
Import/Export API router for Excel question migration.
|
||||
Import/Export API router for migration and snapshot ingestion.
|
||||
|
||||
Endpoints:
|
||||
- POST /api/v1/import/preview: Preview Excel import without saving
|
||||
- POST /api/v1/import/questions: Import questions from Excel to database
|
||||
- GET /api/v1/export/questions: Export questions to Excel file
|
||||
- POST /api/v1/import-export/tryout-json/preview: Preview Sejoli tryout JSON import
|
||||
- POST /api/v1/import-export/tryout-json: Import Sejoli tryout JSON as read-only snapshot
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, Header, HTTPException, UploadFile, status
|
||||
@@ -16,12 +19,18 @@ from fastapi.responses import FileResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Website
|
||||
from app.services.excel_import import (
|
||||
bulk_insert_items,
|
||||
export_questions_to_excel,
|
||||
parse_excel_import,
|
||||
validate_excel_structure,
|
||||
)
|
||||
from app.services.tryout_json_import import (
|
||||
TryoutImportError,
|
||||
import_tryout_json_snapshot,
|
||||
preview_tryout_json_import,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/import-export", tags=["import-export"])
|
||||
|
||||
@@ -55,6 +64,21 @@ def get_website_id_from_header(
|
||||
)
|
||||
|
||||
|
||||
async def ensure_website_exists(
|
||||
website_id: int,
|
||||
db: AsyncSession,
|
||||
) -> None:
|
||||
website = await db.get(Website, website_id)
|
||||
if website is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=(
|
||||
f"Website {website_id} not found. Website registration is stored in the database, "
|
||||
"not in .env."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/preview",
|
||||
summary="Preview Excel import",
|
||||
@@ -322,3 +346,73 @@ async def export_questions(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Export failed: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/tryout-json/preview",
|
||||
summary="Preview Sejoli tryout JSON import",
|
||||
description="Parse a Sejoli tryout export JSON file and show snapshot diff without writing to database.",
|
||||
)
|
||||
async def preview_tryout_json(
|
||||
file: UploadFile = File(..., description="Sejoli tryout export JSON"),
|
||||
website_id: int = Depends(get_website_id_from_header),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> dict:
|
||||
if not file.filename or not file.filename.lower().endswith(".json"):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="File must be .json format",
|
||||
)
|
||||
|
||||
await ensure_website_exists(website_id, db)
|
||||
|
||||
try:
|
||||
payload = json.loads((await file.read()).decode("utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Invalid JSON file: {str(e)}",
|
||||
)
|
||||
|
||||
try:
|
||||
return await preview_tryout_json_import(payload, website_id, db)
|
||||
except TryoutImportError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=str(e),
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/tryout-json",
|
||||
summary="Import Sejoli tryout JSON snapshot",
|
||||
description="Store Sejoli tryout export JSON as read-only snapshot data and upsert normalized reference questions.",
|
||||
)
|
||||
async def import_tryout_json(
|
||||
file: UploadFile = File(..., description="Sejoli tryout export JSON"),
|
||||
website_id: int = Depends(get_website_id_from_header),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> dict:
|
||||
if not file.filename or not file.filename.lower().endswith(".json"):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="File must be .json format",
|
||||
)
|
||||
|
||||
await ensure_website_exists(website_id, db)
|
||||
|
||||
try:
|
||||
payload = json.loads((await file.read()).decode("utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Invalid JSON file: {str(e)}",
|
||||
)
|
||||
|
||||
try:
|
||||
return await import_tryout_json_snapshot(payload, website_id, db)
|
||||
except TryoutImportError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=str(e),
|
||||
)
|
||||
|
||||
341
app/services/tryout_json_import.py
Normal file
341
app/services/tryout_json_import.py
Normal file
@@ -0,0 +1,341 @@
|
||||
"""
|
||||
Importer for Sejoli tryout JSON snapshot payloads.
|
||||
|
||||
This importer stores snapshots as read-only reference data. It does not create
|
||||
or overwrite operational items, because the exported JSON does not currently
|
||||
contain the full option text needed for the live item bank.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models import TryoutImportSnapshot, TryoutSnapshotQuestion, Website
|
||||
|
||||
SOURCE_FORMAT = "sejoli_json"
|
||||
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
|
||||
class TryoutImportError(ValueError):
|
||||
"""Raised when the incoming payload is structurally invalid."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class QuestionDiffSummary:
|
||||
total_questions: int
|
||||
new_questions: int
|
||||
updated_questions: int
|
||||
unchanged_questions: int
|
||||
removed_questions: int
|
||||
missing_option_labels: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class TryoutPreview:
|
||||
source_tryout_id: str
|
||||
source_key: str
|
||||
title: str
|
||||
permalink: str | None
|
||||
question_diff: QuestionDiffSummary
|
||||
warnings: list[str]
|
||||
|
||||
|
||||
def _parse_datetime(value: str | None) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
return datetime.strptime(value, DATETIME_FORMAT).replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def _sha256(value: Any) -> str:
|
||||
payload = json.dumps(value, sort_keys=True, ensure_ascii=False)
|
||||
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _validate_root(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
if not isinstance(payload, dict):
|
||||
raise TryoutImportError("Payload must be a JSON object.")
|
||||
if "tryouts" not in payload or not isinstance(payload["tryouts"], dict) or not payload["tryouts"]:
|
||||
raise TryoutImportError("Payload must contain a non-empty 'tryouts' object.")
|
||||
return payload
|
||||
|
||||
|
||||
def _extract_tryout_previews(payload: dict[str, Any]) -> list[tuple[str, dict[str, Any]]]:
|
||||
return list(payload["tryouts"].items())
|
||||
|
||||
|
||||
def _normalize_question(question: dict[str, Any]) -> dict[str, Any]:
|
||||
raw_options = question.get("options") or []
|
||||
has_option_labels = any(bool((opt or {}).get("label")) for opt in raw_options if isinstance(opt, dict))
|
||||
normalized = {
|
||||
"source_question_id": str(question.get("id", "")),
|
||||
"title": str(question.get("title") or "").strip(),
|
||||
"question": str(question.get("question") or "").strip(),
|
||||
"explanation": str(question.get("explanation") or "").strip() or None,
|
||||
"correct_answer": str(question.get("answer") or "").strip().upper(),
|
||||
"category_id": question.get("category_id"),
|
||||
"category_name": str(question.get("category_name") or "").strip() or None,
|
||||
"category_code": str(question.get("category_code") or "").strip() or None,
|
||||
"raw_options": raw_options,
|
||||
"option_count": len(raw_options),
|
||||
"has_option_labels": has_option_labels,
|
||||
"raw_payload": question,
|
||||
}
|
||||
normalized["content_checksum"] = _sha256(
|
||||
{
|
||||
"title": normalized["title"],
|
||||
"question": normalized["question"],
|
||||
"explanation": normalized["explanation"],
|
||||
"correct_answer": normalized["correct_answer"],
|
||||
"category_id": normalized["category_id"],
|
||||
"category_name": normalized["category_name"],
|
||||
"category_code": normalized["category_code"],
|
||||
"raw_options": normalized["raw_options"],
|
||||
}
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
async def ensure_website_exists(db: AsyncSession, website_id: int) -> Website:
|
||||
result = await db.execute(select(Website).where(Website.id == website_id))
|
||||
website = result.scalar_one_or_none()
|
||||
if website is None:
|
||||
raise TryoutImportError(
|
||||
f"Website {website_id} not found. Register the website in the backend first; this is not configured via .env."
|
||||
)
|
||||
return website
|
||||
|
||||
|
||||
async def preview_tryout_json_import(payload: dict[str, Any], website_id: int, db: AsyncSession) -> dict[str, Any]:
|
||||
_validate_root(payload)
|
||||
await ensure_website_exists(db, website_id)
|
||||
|
||||
tryout_previews: list[TryoutPreview] = []
|
||||
total_new = total_updated = total_unchanged = total_removed = total_missing_labels = 0
|
||||
|
||||
for source_key, tryout_payload in _extract_tryout_previews(payload):
|
||||
info = tryout_payload.get("info") or {}
|
||||
source_tryout_id = str(info.get("id") or source_key)
|
||||
title = str(info.get("title") or source_key)
|
||||
questions = tryout_payload.get("questions") or []
|
||||
normalized_questions = [_normalize_question(q) for q in questions]
|
||||
|
||||
existing_result = await db.execute(
|
||||
select(TryoutSnapshotQuestion).where(
|
||||
TryoutSnapshotQuestion.website_id == website_id,
|
||||
TryoutSnapshotQuestion.source_tryout_id == source_tryout_id,
|
||||
)
|
||||
)
|
||||
existing_questions = {
|
||||
row.source_question_id: row
|
||||
for row in existing_result.scalars().all()
|
||||
}
|
||||
|
||||
new_questions = updated_questions = unchanged_questions = 0
|
||||
missing_option_labels = 0
|
||||
incoming_ids: set[str] = set()
|
||||
|
||||
for question in normalized_questions:
|
||||
incoming_ids.add(question["source_question_id"])
|
||||
existing = existing_questions.get(question["source_question_id"])
|
||||
if question["has_option_labels"] is False:
|
||||
missing_option_labels += 1
|
||||
if existing is None:
|
||||
new_questions += 1
|
||||
elif existing.content_checksum != question["content_checksum"]:
|
||||
updated_questions += 1
|
||||
else:
|
||||
unchanged_questions += 1
|
||||
|
||||
removed_questions = sum(1 for question_id, row in existing_questions.items() if row.is_active and question_id not in incoming_ids)
|
||||
|
||||
warnings: list[str] = []
|
||||
if missing_option_labels:
|
||||
warnings.append(
|
||||
f"{missing_option_labels} question(s) have no visible option labels in the export; import will store raw reference data only."
|
||||
)
|
||||
|
||||
summary = QuestionDiffSummary(
|
||||
total_questions=len(normalized_questions),
|
||||
new_questions=new_questions,
|
||||
updated_questions=updated_questions,
|
||||
unchanged_questions=unchanged_questions,
|
||||
removed_questions=removed_questions,
|
||||
missing_option_labels=missing_option_labels,
|
||||
)
|
||||
|
||||
total_new += new_questions
|
||||
total_updated += updated_questions
|
||||
total_unchanged += unchanged_questions
|
||||
total_removed += removed_questions
|
||||
total_missing_labels += missing_option_labels
|
||||
|
||||
tryout_previews.append(
|
||||
TryoutPreview(
|
||||
source_tryout_id=source_tryout_id,
|
||||
source_key=source_key,
|
||||
title=title,
|
||||
permalink=info.get("permalink"),
|
||||
question_diff=summary,
|
||||
warnings=warnings,
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"source_format": SOURCE_FORMAT,
|
||||
"tryout_count": len(tryout_previews),
|
||||
"totals": {
|
||||
"new_questions": total_new,
|
||||
"updated_questions": total_updated,
|
||||
"unchanged_questions": total_unchanged,
|
||||
"removed_questions": total_removed,
|
||||
"missing_option_labels": total_missing_labels,
|
||||
},
|
||||
"tryouts": [
|
||||
{
|
||||
"source_tryout_id": preview.source_tryout_id,
|
||||
"source_key": preview.source_key,
|
||||
"title": preview.title,
|
||||
"permalink": preview.permalink,
|
||||
"question_diff": preview.question_diff.__dict__,
|
||||
"warnings": preview.warnings,
|
||||
}
|
||||
for preview in tryout_previews
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
async def import_tryout_json_snapshot(payload: dict[str, Any], website_id: int, db: AsyncSession) -> dict[str, Any]:
|
||||
preview = await preview_tryout_json_import(payload, website_id, db)
|
||||
export_info = payload.get("export_info") or {}
|
||||
|
||||
imported_tryouts: list[dict[str, Any]] = []
|
||||
|
||||
for source_key, tryout_payload in _extract_tryout_previews(payload):
|
||||
info = tryout_payload.get("info") or {}
|
||||
source_tryout_id = str(info.get("id") or source_key)
|
||||
title = str(info.get("title") or source_key)
|
||||
questions = tryout_payload.get("questions") or []
|
||||
results = tryout_payload.get("results") or []
|
||||
normalized_questions = [_normalize_question(q) for q in questions]
|
||||
|
||||
snapshot = TryoutImportSnapshot(
|
||||
website_id=website_id,
|
||||
source_tryout_id=source_tryout_id,
|
||||
source_key=source_key,
|
||||
title=title,
|
||||
source_permalink=info.get("permalink"),
|
||||
source_status=info.get("status"),
|
||||
exported_at=_parse_datetime(export_info.get("exported_at")),
|
||||
source_created_at=_parse_datetime(info.get("created_date")),
|
||||
source_modified_at=_parse_datetime(info.get("modified_date")),
|
||||
exported_by=export_info.get("exported_by"),
|
||||
question_count=len(questions),
|
||||
result_count=len(results),
|
||||
payload_checksum=_sha256(tryout_payload),
|
||||
raw_payload=tryout_payload,
|
||||
)
|
||||
db.add(snapshot)
|
||||
await db.flush()
|
||||
|
||||
existing_result = await db.execute(
|
||||
select(TryoutSnapshotQuestion).where(
|
||||
TryoutSnapshotQuestion.website_id == website_id,
|
||||
TryoutSnapshotQuestion.source_tryout_id == source_tryout_id,
|
||||
)
|
||||
)
|
||||
existing_questions = {
|
||||
row.source_question_id: row
|
||||
for row in existing_result.scalars().all()
|
||||
}
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
incoming_ids: set[str] = set()
|
||||
new_questions = updated_questions = unchanged_questions = 0
|
||||
|
||||
for question in normalized_questions:
|
||||
source_question_id = question["source_question_id"]
|
||||
incoming_ids.add(source_question_id)
|
||||
existing = existing_questions.get(source_question_id)
|
||||
if existing is None:
|
||||
row = TryoutSnapshotQuestion(
|
||||
website_id=website_id,
|
||||
source_tryout_id=source_tryout_id,
|
||||
source_question_id=source_question_id,
|
||||
latest_snapshot_id=snapshot.id,
|
||||
question_title=question["title"] or question["question"],
|
||||
question_html=question["question"],
|
||||
explanation_html=question["explanation"],
|
||||
raw_options=question["raw_options"],
|
||||
correct_answer=question["correct_answer"],
|
||||
category_id=question["category_id"],
|
||||
category_name=question["category_name"],
|
||||
category_code=question["category_code"],
|
||||
option_count=question["option_count"],
|
||||
has_option_labels=question["has_option_labels"],
|
||||
is_active=True,
|
||||
content_checksum=question["content_checksum"],
|
||||
raw_payload=question["raw_payload"],
|
||||
last_seen_at=now,
|
||||
)
|
||||
db.add(row)
|
||||
new_questions += 1
|
||||
continue
|
||||
|
||||
if existing.content_checksum != question["content_checksum"]:
|
||||
existing.question_title = question["title"] or question["question"]
|
||||
existing.question_html = question["question"]
|
||||
existing.explanation_html = question["explanation"]
|
||||
existing.raw_options = question["raw_options"]
|
||||
existing.correct_answer = question["correct_answer"]
|
||||
existing.category_id = question["category_id"]
|
||||
existing.category_name = question["category_name"]
|
||||
existing.category_code = question["category_code"]
|
||||
existing.option_count = question["option_count"]
|
||||
existing.has_option_labels = question["has_option_labels"]
|
||||
existing.content_checksum = question["content_checksum"]
|
||||
existing.raw_payload = question["raw_payload"]
|
||||
updated_questions += 1
|
||||
else:
|
||||
unchanged_questions += 1
|
||||
|
||||
existing.latest_snapshot_id = snapshot.id
|
||||
existing.is_active = True
|
||||
existing.last_seen_at = now
|
||||
|
||||
removed_questions = 0
|
||||
for source_question_id, existing in existing_questions.items():
|
||||
if existing.is_active and source_question_id not in incoming_ids:
|
||||
existing.is_active = False
|
||||
existing.latest_snapshot_id = snapshot.id
|
||||
existing.last_seen_at = now
|
||||
removed_questions += 1
|
||||
|
||||
imported_tryouts.append(
|
||||
{
|
||||
"snapshot_id": snapshot.id,
|
||||
"source_tryout_id": source_tryout_id,
|
||||
"title": title,
|
||||
"new_questions": new_questions,
|
||||
"updated_questions": updated_questions,
|
||||
"unchanged_questions": unchanged_questions,
|
||||
"removed_questions": removed_questions,
|
||||
"question_count": len(normalized_questions),
|
||||
}
|
||||
)
|
||||
|
||||
await db.flush()
|
||||
|
||||
return {
|
||||
"source_format": SOURCE_FORMAT,
|
||||
"website_id": website_id,
|
||||
"preview": preview,
|
||||
"imported_tryouts": imported_tryouts,
|
||||
"message": "Tryout JSON snapshot imported as read-only reference data.",
|
||||
}
|
||||
110
tests/test_tryout_json_import.py
Normal file
110
tests/test_tryout_json_import.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||
|
||||
from app.services.tryout_json_import import preview_tryout_json_import
|
||||
|
||||
|
||||
class DummyScalarResult:
|
||||
def __init__(self, value):
|
||||
self._value = value
|
||||
|
||||
def scalar_one_or_none(self):
|
||||
return self._value
|
||||
|
||||
|
||||
class DummyScalars:
|
||||
def __init__(self, values):
|
||||
self._values = values
|
||||
|
||||
def all(self):
|
||||
return self._values
|
||||
|
||||
|
||||
class DummyListResult:
|
||||
def __init__(self, values):
|
||||
self._values = values
|
||||
|
||||
def scalars(self):
|
||||
return DummyScalars(self._values)
|
||||
|
||||
|
||||
class DummySession:
|
||||
def __init__(self, responses):
|
||||
self._responses = list(responses)
|
||||
|
||||
async def execute(self, _query):
|
||||
return self._responses.pop(0)
|
||||
|
||||
|
||||
def test_preview_tryout_json_import_classifies_new_updated_and_removed_questions():
|
||||
payload = {
|
||||
"export_info": {
|
||||
"exported_at": "2026-04-02 09:12:59",
|
||||
"exported_by": "Admin",
|
||||
"tryout_id": 1038,
|
||||
},
|
||||
"tryouts": {
|
||||
"tryout_1038": {
|
||||
"info": {
|
||||
"id": 1038,
|
||||
"title": "Tryout PPDS Obgyn",
|
||||
"permalink": "https://member.example.com/tryout/1038",
|
||||
},
|
||||
"questions": [
|
||||
{
|
||||
"id": 269,
|
||||
"title": "Question A",
|
||||
"question": "<p>Question A body</p>",
|
||||
"options": [
|
||||
{"increment": "A", "label": "", "value": "0"},
|
||||
{"increment": "B", "label": "", "value": "1"},
|
||||
],
|
||||
"answer": "B",
|
||||
"explanation": "<p>Because.</p>",
|
||||
},
|
||||
{
|
||||
"id": 270,
|
||||
"title": "Question B new",
|
||||
"question": "<p>Question B body</p>",
|
||||
"options": [
|
||||
{"increment": "A", "label": "", "value": "1"},
|
||||
{"increment": "B", "label": "", "value": "0"},
|
||||
],
|
||||
"answer": "A",
|
||||
"explanation": "<p>New item.</p>",
|
||||
},
|
||||
],
|
||||
"results": [],
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
existing_question = SimpleNamespace(
|
||||
source_question_id="269",
|
||||
content_checksum="old-checksum",
|
||||
is_active=True,
|
||||
)
|
||||
removed_question = SimpleNamespace(
|
||||
source_question_id="999",
|
||||
content_checksum="removed-checksum",
|
||||
is_active=True,
|
||||
)
|
||||
db = DummySession(
|
||||
[
|
||||
DummyScalarResult(SimpleNamespace(id=1)),
|
||||
DummyListResult([existing_question, removed_question]),
|
||||
]
|
||||
)
|
||||
|
||||
preview = asyncio.run(preview_tryout_json_import(payload, website_id=1, db=db))
|
||||
|
||||
assert preview["tryout_count"] == 1
|
||||
assert preview["totals"]["new_questions"] == 1
|
||||
assert preview["totals"]["updated_questions"] == 1
|
||||
assert preview["totals"]["removed_questions"] == 1
|
||||
assert preview["totals"]["missing_option_labels"] == 2
|
||||
assert "read-only reference data" not in str(preview)
|
||||
Reference in New Issue
Block a user