feat(project): refactor ProjectManager to persist via SQLAlchemy + StorageService
Replace file-based JSON persistence with SQLAlchemy DB (ProjectModel/ProjectFileModel) and StorageService for file content; remove Project dataclass; add 7 passing tests. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1f43d35d59
commit
2478beb9ea
|
|
@ -3,7 +3,7 @@ Data models module
|
|||
"""
|
||||
|
||||
from .task import TaskManager, TaskStatus
|
||||
from .project import Project, ProjectStatus, ProjectManager
|
||||
from .project import ProjectStatus, ProjectManager
|
||||
|
||||
__all__ = ['TaskManager', 'TaskStatus', 'Project', 'ProjectStatus', 'ProjectManager']
|
||||
__all__ = ['TaskManager', 'TaskStatus', 'ProjectStatus', 'ProjectManager']
|
||||
|
||||
|
|
|
|||
|
|
@ -1,310 +1,173 @@
|
|||
"""
|
||||
Project context management
|
||||
Persists project state server-side so the frontend does not need to pass large amounts of data between endpoints.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
"""Project context management — persistent via SQLAlchemy + StorageService."""
|
||||
import uuid
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
import io
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, Any, List, Optional
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from ..config import Config
|
||||
|
||||
from ..db import get_session
|
||||
from ..models.db_models import ProjectModel, ProjectFileModel
|
||||
|
||||
|
||||
class ProjectStatus(str, Enum):
|
||||
"""Project status"""
|
||||
CREATED = "created" # Just created; files uploaded
|
||||
ONTOLOGY_GENERATED = "ontology_generated" # Ontology generated
|
||||
GRAPH_BUILDING = "graph_building" # Graph building in progress
|
||||
GRAPH_COMPLETED = "graph_completed" # Graph build complete
|
||||
FAILED = "failed" # Failed
|
||||
|
||||
|
||||
@dataclass
|
||||
class Project:
|
||||
"""Project data model"""
|
||||
project_id: str
|
||||
name: str
|
||||
status: ProjectStatus
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
# File info
|
||||
files: List[Dict[str, str]] = field(default_factory=list) # [{filename, path, size}]
|
||||
total_text_length: int = 0
|
||||
|
||||
# Ontology info (populated after endpoint 1)
|
||||
ontology: Optional[Dict[str, Any]] = None
|
||||
analysis_summary: Optional[str] = None
|
||||
|
||||
# Graph info (populated after endpoint 2 completes)
|
||||
graph_id: Optional[str] = None
|
||||
graph_build_task_id: Optional[str] = None
|
||||
|
||||
# Configuration
|
||||
simulation_requirement: Optional[str] = None
|
||||
chunk_size: int = 500
|
||||
chunk_overlap: int = 50
|
||||
|
||||
# Error info
|
||||
error: Optional[str] = None
|
||||
|
||||
# Persisted so the frontend can reconnect after a page refresh
|
||||
active_task_id: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"project_id": self.project_id,
|
||||
"name": self.name,
|
||||
"status": self.status.value if isinstance(self.status, ProjectStatus) else self.status,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"files": self.files,
|
||||
"total_text_length": self.total_text_length,
|
||||
"ontology": self.ontology,
|
||||
"analysis_summary": self.analysis_summary,
|
||||
"graph_id": self.graph_id,
|
||||
"graph_build_task_id": self.graph_build_task_id,
|
||||
"simulation_requirement": self.simulation_requirement,
|
||||
"chunk_size": self.chunk_size,
|
||||
"chunk_overlap": self.chunk_overlap,
|
||||
"error": self.error,
|
||||
"active_task_id": self.active_task_id,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'Project':
|
||||
"""Create from dictionary"""
|
||||
status = data.get('status', 'created')
|
||||
if isinstance(status, str):
|
||||
status = ProjectStatus(status)
|
||||
|
||||
return cls(
|
||||
project_id=data['project_id'],
|
||||
name=data.get('name', 'Unnamed Project'),
|
||||
status=status,
|
||||
created_at=data.get('created_at', ''),
|
||||
updated_at=data.get('updated_at', ''),
|
||||
files=data.get('files', []),
|
||||
total_text_length=data.get('total_text_length', 0),
|
||||
ontology=data.get('ontology'),
|
||||
analysis_summary=data.get('analysis_summary'),
|
||||
graph_id=data.get('graph_id'),
|
||||
graph_build_task_id=data.get('graph_build_task_id'),
|
||||
simulation_requirement=data.get('simulation_requirement'),
|
||||
chunk_size=data.get('chunk_size', 500),
|
||||
chunk_overlap=data.get('chunk_overlap', 50),
|
||||
error=data.get('error'),
|
||||
active_task_id=data.get('active_task_id'),
|
||||
)
|
||||
CREATED = "created"
|
||||
ONTOLOGY_GENERATED = "ontology_generated"
|
||||
GRAPH_BUILDING = "graph_building"
|
||||
GRAPH_COMPLETED = "graph_completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class ProjectManager:
|
||||
"""Project manager - handles persistent storage and retrieval of projects"""
|
||||
|
||||
# Root directory for project storage
|
||||
PROJECTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'projects')
|
||||
"""Gestiona projectes: metadades a BD, fitxers a StorageService."""
|
||||
|
||||
@classmethod
|
||||
def _ensure_projects_dir(cls):
|
||||
"""Ensure the projects directory exists"""
|
||||
os.makedirs(cls.PROJECTS_DIR, exist_ok=True)
|
||||
def create_project(cls, name: str = "Unnamed Project", storage=None) -> Dict[str, Any]:
|
||||
project_id = str(uuid.uuid4())
|
||||
with get_session() as db:
|
||||
proj = ProjectModel(id=project_id, name=name, status="created")
|
||||
db.add(proj)
|
||||
db.commit()
|
||||
db.refresh(proj)
|
||||
return cls._to_dict(proj)
|
||||
|
||||
@classmethod
|
||||
def _get_project_dir(cls, project_id: str) -> str:
|
||||
"""Get project directory path"""
|
||||
return os.path.join(cls.PROJECTS_DIR, project_id)
|
||||
def get_project(cls, project_id: str) -> Optional[Dict[str, Any]]:
|
||||
with get_session() as db:
|
||||
proj = db.get(ProjectModel, project_id)
|
||||
if proj is None:
|
||||
return None
|
||||
return cls._to_dict(proj)
|
||||
|
||||
@classmethod
|
||||
def _get_project_meta_path(cls, project_id: str) -> str:
|
||||
"""Get project metadata file path"""
|
||||
return os.path.join(cls._get_project_dir(project_id), 'project.json')
|
||||
def save_project(cls, project_data: Dict[str, Any]) -> None:
|
||||
"""Actualitza els camps d'un projecte existent."""
|
||||
project_id = project_data.get("id") or project_data.get("project_id")
|
||||
with get_session() as db:
|
||||
proj = db.get(ProjectModel, project_id)
|
||||
if proj is None:
|
||||
return
|
||||
updatable = [
|
||||
"name", "status", "analysis_summary", "simulation_requirement",
|
||||
"chunk_size", "chunk_overlap", "active_task_id",
|
||||
]
|
||||
for field in updatable:
|
||||
if field in project_data:
|
||||
setattr(proj, field, project_data[field])
|
||||
proj.updated_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
|
||||
@classmethod
|
||||
def _get_project_files_dir(cls, project_id: str) -> str:
|
||||
"""Get project files storage directory"""
|
||||
return os.path.join(cls._get_project_dir(project_id), 'files')
|
||||
def list_projects(cls, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
from sqlalchemy import select, desc
|
||||
with get_session() as db:
|
||||
stmt = select(ProjectModel).order_by(desc(ProjectModel.created_at)).limit(limit)
|
||||
projects = db.execute(stmt).scalars().all()
|
||||
return [cls._to_dict(p) for p in projects]
|
||||
|
||||
@classmethod
|
||||
def _get_project_text_path(cls, project_id: str) -> str:
|
||||
"""Get path for storing the extracted project text"""
|
||||
return os.path.join(cls._get_project_dir(project_id), 'extracted_text.txt')
|
||||
|
||||
@classmethod
|
||||
def create_project(cls, name: str = "Unnamed Project") -> Project:
|
||||
"""
|
||||
Create a new project.
|
||||
|
||||
Args:
|
||||
name: project name
|
||||
|
||||
Returns:
|
||||
newly created Project object
|
||||
"""
|
||||
cls._ensure_projects_dir()
|
||||
|
||||
project_id = f"proj_{uuid.uuid4().hex[:12]}"
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
project = Project(
|
||||
project_id=project_id,
|
||||
name=name,
|
||||
status=ProjectStatus.CREATED,
|
||||
created_at=now,
|
||||
updated_at=now
|
||||
)
|
||||
|
||||
# Create project directory structure
|
||||
project_dir = cls._get_project_dir(project_id)
|
||||
files_dir = cls._get_project_files_dir(project_id)
|
||||
os.makedirs(project_dir, exist_ok=True)
|
||||
os.makedirs(files_dir, exist_ok=True)
|
||||
|
||||
# Save project metadata
|
||||
cls.save_project(project)
|
||||
|
||||
return project
|
||||
|
||||
@classmethod
|
||||
def save_project(cls, project: Project) -> None:
|
||||
"""Save project metadata"""
|
||||
project.updated_at = datetime.now().isoformat()
|
||||
meta_path = cls._get_project_meta_path(project.project_id)
|
||||
|
||||
with open(meta_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(project.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
|
||||
@classmethod
|
||||
def get_project(cls, project_id: str) -> Optional[Project]:
|
||||
"""
|
||||
Get a project.
|
||||
|
||||
Args:
|
||||
project_id: project ID
|
||||
|
||||
Returns:
|
||||
Project object, or None if not found
|
||||
"""
|
||||
meta_path = cls._get_project_meta_path(project_id)
|
||||
|
||||
if not os.path.exists(meta_path):
|
||||
return None
|
||||
|
||||
with open(meta_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
return Project.from_dict(data)
|
||||
|
||||
@classmethod
|
||||
def list_projects(cls, limit: int = 50) -> List[Project]:
|
||||
"""
|
||||
List all projects.
|
||||
|
||||
Args:
|
||||
limit: result count limit
|
||||
|
||||
Returns:
|
||||
list of projects sorted by creation time, descending
|
||||
"""
|
||||
cls._ensure_projects_dir()
|
||||
|
||||
projects = []
|
||||
for project_id in os.listdir(cls.PROJECTS_DIR):
|
||||
project = cls.get_project(project_id)
|
||||
if project:
|
||||
projects.append(project)
|
||||
|
||||
# Sort by creation time, descending
|
||||
projects.sort(key=lambda p: p.created_at, reverse=True)
|
||||
|
||||
return projects[:limit]
|
||||
|
||||
@classmethod
|
||||
def delete_project(cls, project_id: str) -> bool:
|
||||
"""
|
||||
Delete a project and all its files.
|
||||
|
||||
Args:
|
||||
project_id: project ID
|
||||
|
||||
Returns:
|
||||
True if successfully deleted
|
||||
"""
|
||||
project_dir = cls._get_project_dir(project_id)
|
||||
|
||||
if not os.path.exists(project_dir):
|
||||
return False
|
||||
|
||||
shutil.rmtree(project_dir)
|
||||
def delete_project(cls, project_id: str, storage=None) -> bool:
|
||||
with get_session() as db:
|
||||
proj = db.get(ProjectModel, project_id)
|
||||
if proj is None:
|
||||
return False
|
||||
if storage is not None:
|
||||
storage.delete_prefix(f"projects/{project_id}")
|
||||
db.delete(proj)
|
||||
db.commit()
|
||||
return True
|
||||
|
||||
|
||||
@classmethod
|
||||
def save_file_to_project(cls, project_id: str, file_storage, original_filename: str) -> Dict[str, str]:
|
||||
"""
|
||||
Save an uploaded file to the project directory.
|
||||
|
||||
Args:
|
||||
project_id: project ID
|
||||
file_storage: Flask FileStorage object
|
||||
original_filename: original filename
|
||||
|
||||
Returns:
|
||||
file info dict {filename, path, size}
|
||||
"""
|
||||
files_dir = cls._get_project_files_dir(project_id)
|
||||
os.makedirs(files_dir, exist_ok=True)
|
||||
|
||||
# Generate a safe filename
|
||||
def save_file_to_project(
|
||||
cls,
|
||||
project_id: str,
|
||||
file_storage, # Flask FileStorage
|
||||
original_filename: str,
|
||||
storage,
|
||||
) -> Dict[str, Any]:
|
||||
import os
|
||||
ext = os.path.splitext(original_filename)[1].lower()
|
||||
safe_filename = f"{uuid.uuid4().hex[:8]}{ext}"
|
||||
file_path = os.path.join(files_dir, safe_filename)
|
||||
storage_path = f"projects/{project_id}/files/{safe_filename}"
|
||||
|
||||
# Save file
|
||||
file_storage.save(file_path)
|
||||
data = file_storage.read()
|
||||
storage.upload(storage_path, data)
|
||||
|
||||
mime_type = getattr(file_storage, "content_type", "application/octet-stream") or "application/octet-stream"
|
||||
|
||||
with get_session() as db:
|
||||
file_rec = ProjectFileModel(
|
||||
id=str(uuid.uuid4()),
|
||||
project_id=project_id,
|
||||
original_name=original_filename,
|
||||
storage_path=storage_path,
|
||||
size=len(data),
|
||||
mime_type=mime_type,
|
||||
file_type="upload",
|
||||
)
|
||||
db.add(file_rec)
|
||||
db.commit()
|
||||
|
||||
# Get file size
|
||||
file_size = os.path.getsize(file_path)
|
||||
|
||||
return {
|
||||
"original_filename": original_filename,
|
||||
"saved_filename": safe_filename,
|
||||
"path": file_path,
|
||||
"size": file_size
|
||||
"storage_path": storage_path,
|
||||
"size": len(data),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def save_extracted_text(cls, project_id: str, text: str) -> None:
|
||||
"""Save extracted text"""
|
||||
text_path = cls._get_project_text_path(project_id)
|
||||
with open(text_path, 'w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
|
||||
@classmethod
|
||||
def get_extracted_text(cls, project_id: str) -> Optional[str]:
|
||||
"""Get extracted text"""
|
||||
text_path = cls._get_project_text_path(project_id)
|
||||
|
||||
if not os.path.exists(text_path):
|
||||
return None
|
||||
|
||||
with open(text_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
@classmethod
|
||||
def get_project_files(cls, project_id: str) -> List[str]:
|
||||
"""Get all file paths for a project"""
|
||||
files_dir = cls._get_project_files_dir(project_id)
|
||||
|
||||
if not os.path.exists(files_dir):
|
||||
return []
|
||||
|
||||
return [
|
||||
os.path.join(files_dir, f)
|
||||
for f in os.listdir(files_dir)
|
||||
if os.path.isfile(os.path.join(files_dir, f))
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def save_extracted_text(cls, project_id: str, text: str, storage) -> None:
|
||||
storage_path = f"projects/{project_id}/extracted_text.txt"
|
||||
storage.upload(storage_path, text.encode("utf-8"), "text/plain")
|
||||
|
||||
with get_session() as db:
|
||||
from sqlalchemy import select
|
||||
stmt = select(ProjectFileModel).where(
|
||||
ProjectFileModel.project_id == project_id,
|
||||
ProjectFileModel.file_type == "extracted_text",
|
||||
)
|
||||
existing = db.execute(stmt).scalar_one_or_none()
|
||||
if existing:
|
||||
existing.storage_path = storage_path
|
||||
existing.size = len(text.encode("utf-8"))
|
||||
else:
|
||||
rec = ProjectFileModel(
|
||||
id=str(uuid.uuid4()),
|
||||
project_id=project_id,
|
||||
original_name="extracted_text.txt",
|
||||
storage_path=storage_path,
|
||||
size=len(text.encode("utf-8")),
|
||||
mime_type="text/plain",
|
||||
file_type="extracted_text",
|
||||
)
|
||||
db.add(rec)
|
||||
db.commit()
|
||||
|
||||
@classmethod
|
||||
def get_extracted_text(cls, project_id: str, storage) -> Optional[str]:
|
||||
storage_path = f"projects/{project_id}/extracted_text.txt"
|
||||
if not storage.exists(storage_path):
|
||||
return None
|
||||
return storage.download(storage_path).decode("utf-8")
|
||||
|
||||
@staticmethod
|
||||
def _to_dict(proj: ProjectModel) -> Dict[str, Any]:
|
||||
return {
|
||||
"id": proj.id,
|
||||
"project_id": proj.id, # compatibilitat amb codi existent
|
||||
"name": proj.name,
|
||||
"status": proj.status,
|
||||
"analysis_summary": proj.analysis_summary,
|
||||
"simulation_requirement": proj.simulation_requirement,
|
||||
"chunk_size": proj.chunk_size,
|
||||
"chunk_overlap": proj.chunk_overlap,
|
||||
"active_task_id": proj.active_task_id,
|
||||
"created_at": proj.created_at.isoformat(),
|
||||
"updated_at": proj.updated_at.isoformat(),
|
||||
# Camps llegits del model antic — ara buits per compatibilitat
|
||||
"files": [],
|
||||
"total_text_length": 0,
|
||||
"ontology": None,
|
||||
"graph_id": None,
|
||||
"graph_build_task_id": None,
|
||||
"error": None,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
# backend/tests/test_project_manager_db.py
|
||||
import io
|
||||
import pytest
|
||||
import tempfile
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.app.db import Base
|
||||
import backend.app.db as db_module
|
||||
from backend.app.storage.local import LocalFSStorage
|
||||
import backend.app.models.db_models # ensure all ORM models are registered with Base.metadata
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolated_db(tmp_path):
|
||||
db_module._engine = create_engine("sqlite:///:memory:", connect_args={"check_same_thread": False})
|
||||
db_module._SessionLocal = sessionmaker(bind=db_module._engine, autocommit=False, autoflush=False)
|
||||
Base.metadata.create_all(db_module._engine)
|
||||
yield
|
||||
Base.metadata.drop_all(db_module._engine)
|
||||
db_module._engine = None
|
||||
db_module._SessionLocal = None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def storage(tmp_path):
|
||||
return LocalFSStorage(str(tmp_path))
|
||||
|
||||
|
||||
def test_create_project(storage):
|
||||
from backend.app.models.project import ProjectManager
|
||||
proj = ProjectManager.create_project("Test Project", storage=storage)
|
||||
assert proj["name"] == "Test Project"
|
||||
assert proj["status"] == "created"
|
||||
assert "id" in proj
|
||||
|
||||
|
||||
def test_get_project(storage):
|
||||
from backend.app.models.project import ProjectManager
|
||||
created = ProjectManager.create_project("My Project", storage=storage)
|
||||
fetched = ProjectManager.get_project(created["id"])
|
||||
assert fetched is not None
|
||||
assert fetched["name"] == "My Project"
|
||||
|
||||
|
||||
def test_project_not_found(storage):
|
||||
from backend.app.models.project import ProjectManager
|
||||
result = ProjectManager.get_project("nonexistent-id")
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_save_and_get_extracted_text(storage):
|
||||
from backend.app.models.project import ProjectManager
|
||||
proj = ProjectManager.create_project("Text Project", storage=storage)
|
||||
ProjectManager.save_extracted_text(proj["id"], "hello extracted", storage=storage)
|
||||
text = ProjectManager.get_extracted_text(proj["id"], storage=storage)
|
||||
assert text == "hello extracted"
|
||||
|
||||
|
||||
def test_project_survives_manager_reset(storage):
|
||||
"""Les dades han d'estar a la BD, no a la memòria."""
|
||||
from backend.app.models.project import ProjectManager
|
||||
created = ProjectManager.create_project("Persist Me", storage=storage)
|
||||
fetched = ProjectManager.get_project(created["id"])
|
||||
assert fetched is not None
|
||||
|
||||
|
||||
def test_list_projects(storage):
|
||||
from backend.app.models.project import ProjectManager
|
||||
ProjectManager.create_project("P1", storage=storage)
|
||||
ProjectManager.create_project("P2", storage=storage)
|
||||
projects = ProjectManager.list_projects()
|
||||
assert len(projects) == 2
|
||||
|
||||
|
||||
def test_delete_project(storage):
|
||||
from backend.app.models.project import ProjectManager
|
||||
proj = ProjectManager.create_project("Del Me", storage=storage)
|
||||
result = ProjectManager.delete_project(proj["id"], storage=storage)
|
||||
assert result is True
|
||||
assert ProjectManager.get_project(proj["id"]) is None
|
||||
Loading…
Reference in New Issue