"""Project context management. Persists project state on the server so the frontend does not have to round-trip large blobs of context between API calls. """ import os import json import uuid import shutil from datetime import datetime from typing import Dict, Any, List, Optional from enum import Enum from dataclasses import dataclass, field, asdict from ..config import Config class ProjectStatus(str, Enum): """Project lifecycle status.""" CREATED = "created" # just created, files uploaded ONTOLOGY_GENERATED = "ontology_generated" # ontology has been generated GRAPH_BUILDING = "graph_building" # graph build in progress GRAPH_COMPLETED = "graph_completed" # graph build finished FAILED = "failed" # build failed @dataclass class Project: """Project data model.""" project_id: str name: str status: ProjectStatus created_at: str updated_at: str # File information files: List[Dict[str, str]] = field(default_factory=list) # [{filename, path, size}] total_text_length: int = 0 # Ontology information (filled in after step 1 generates it) ontology: Optional[Dict[str, Any]] = None analysis_summary: Optional[str] = None # Graph information (filled in after step 2 finishes) graph_id: Optional[str] = None graph_build_task_id: Optional[str] = None # Configuration simulation_requirement: Optional[str] = None chunk_size: int = 500 chunk_overlap: int = 50 # Error message when status == FAILED error: Optional[str] = None def to_dict(self) -> Dict[str, Any]: """Serialize the project to a JSON-friendly dict.""" return { "project_id": self.project_id, "name": self.name, "status": self.status.value if isinstance(self.status, ProjectStatus) else self.status, "created_at": self.created_at, "updated_at": self.updated_at, "files": self.files, "total_text_length": self.total_text_length, "ontology": self.ontology, "analysis_summary": self.analysis_summary, "graph_id": self.graph_id, "graph_build_task_id": self.graph_build_task_id, "simulation_requirement": self.simulation_requirement, "chunk_size": self.chunk_size, "chunk_overlap": self.chunk_overlap, "error": self.error } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Project': """Reconstruct a project from its serialized dict.""" status = data.get('status', 'created') if isinstance(status, str): status = ProjectStatus(status) return cls( project_id=data['project_id'], name=data.get('name', 'Unnamed Project'), status=status, created_at=data.get('created_at', ''), updated_at=data.get('updated_at', ''), files=data.get('files', []), total_text_length=data.get('total_text_length', 0), ontology=data.get('ontology'), analysis_summary=data.get('analysis_summary'), graph_id=data.get('graph_id'), graph_build_task_id=data.get('graph_build_task_id'), simulation_requirement=data.get('simulation_requirement'), chunk_size=data.get('chunk_size', 500), chunk_overlap=data.get('chunk_overlap', 50), error=data.get('error') ) class ProjectManager: """Project manager: handles persistence and retrieval of projects on disk.""" # Root directory for project storage PROJECTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'projects') @classmethod def _ensure_projects_dir(cls): """Ensure the projects root directory exists.""" os.makedirs(cls.PROJECTS_DIR, exist_ok=True) @classmethod def _get_project_dir(cls, project_id: str) -> str: """Return the on-disk directory for a project.""" return os.path.join(cls.PROJECTS_DIR, project_id) @classmethod def _get_project_meta_path(cls, project_id: str) -> str: """Return the path to a project's metadata JSON file.""" return os.path.join(cls._get_project_dir(project_id), 'project.json') @classmethod def _get_project_files_dir(cls, project_id: str) -> str: """Return the directory where project source files are stored.""" return os.path.join(cls._get_project_dir(project_id), 'files') @classmethod def _get_project_text_path(cls, project_id: str) -> str: """Return the path to a project's extracted text file.""" return os.path.join(cls._get_project_dir(project_id), 'extracted_text.txt') @classmethod def create_project(cls, name: str = "Unnamed Project") -> Project: """Create a new project. Args: name: Display name for the project. Returns: The newly created ``Project`` instance. """ cls._ensure_projects_dir() project_id = f"proj_{uuid.uuid4().hex[:12]}" now = datetime.now().isoformat() project = Project( project_id=project_id, name=name, status=ProjectStatus.CREATED, created_at=now, updated_at=now ) # Create the on-disk project directory layout project_dir = cls._get_project_dir(project_id) files_dir = cls._get_project_files_dir(project_id) os.makedirs(project_dir, exist_ok=True) os.makedirs(files_dir, exist_ok=True) # Persist project metadata cls.save_project(project) return project @classmethod def save_project(cls, project: Project) -> None: """Persist project metadata to disk.""" project.updated_at = datetime.now().isoformat() meta_path = cls._get_project_meta_path(project.project_id) with open(meta_path, 'w', encoding='utf-8') as f: json.dump(project.to_dict(), f, ensure_ascii=False, indent=2) @classmethod def get_project(cls, project_id: str) -> Optional[Project]: """Load a project by id. Args: project_id: Project identifier. Returns: The ``Project`` if it exists, otherwise ``None``. """ meta_path = cls._get_project_meta_path(project_id) if not os.path.exists(meta_path): return None with open(meta_path, 'r', encoding='utf-8') as f: data = json.load(f) return Project.from_dict(data) @classmethod def list_projects(cls, limit: int = 50) -> List[Project]: """List existing projects, newest first. Args: limit: Maximum number of projects to return. Returns: Projects ordered by ``created_at`` descending. """ cls._ensure_projects_dir() projects = [] for project_id in os.listdir(cls.PROJECTS_DIR): project = cls.get_project(project_id) if project: projects.append(project) projects.sort(key=lambda p: p.created_at, reverse=True) return projects[:limit] @classmethod def delete_project(cls, project_id: str) -> bool: """Delete a project and all of its files. Args: project_id: Project identifier. Returns: ``True`` if the project existed and was removed, ``False`` otherwise. """ project_dir = cls._get_project_dir(project_id) if not os.path.exists(project_dir): return False shutil.rmtree(project_dir) return True @classmethod def save_file_to_project(cls, project_id: str, file_storage, original_filename: str) -> Dict[str, str]: """Save an uploaded file under the project's files directory. Args: project_id: Project identifier. file_storage: Flask ``FileStorage`` object from the request. original_filename: The user-supplied filename. Returns: Dict describing the saved file: ``{original_filename, saved_filename, path, size}``. """ files_dir = cls._get_project_files_dir(project_id) os.makedirs(files_dir, exist_ok=True) # Generate a safe randomized filename to avoid collisions ext = os.path.splitext(original_filename)[1].lower() safe_filename = f"{uuid.uuid4().hex[:8]}{ext}" file_path = os.path.join(files_dir, safe_filename) file_storage.save(file_path) file_size = os.path.getsize(file_path) return { "original_filename": original_filename, "saved_filename": safe_filename, "path": file_path, "size": file_size } @classmethod def save_extracted_text(cls, project_id: str, text: str) -> None: """Persist the project's extracted full text to disk.""" text_path = cls._get_project_text_path(project_id) with open(text_path, 'w', encoding='utf-8') as f: f.write(text) @classmethod def get_extracted_text(cls, project_id: str) -> Optional[str]: """Read back the project's extracted full text, or ``None`` if absent.""" text_path = cls._get_project_text_path(project_id) if not os.path.exists(text_path): return None with open(text_path, 'r', encoding='utf-8') as f: return f.read() @classmethod def get_project_files(cls, project_id: str) -> List[str]: """Return the on-disk paths of all files in the project.""" files_dir = cls._get_project_files_dir(project_id) if not os.path.exists(files_dir): return [] return [ os.path.join(files_dir, f) for f in os.listdir(files_dir) if os.path.isfile(os.path.join(files_dir, f)) ]