diff --git a/api_server.py b/api_server.py index 8833f92..564b9ed 100644 --- a/api_server.py +++ b/api_server.py @@ -9,7 +9,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.responses import HTMLResponse, FileResponse, JSONResponse from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware -from pydantic import BaseModel, validator +from pydantic import BaseModel, field_validator from typing import Optional, List, Dict, Any import json import os @@ -58,7 +58,8 @@ class WorkflowSummary(BaseModel): # Allow conversion of int to bool for active field validate_assignment = True - @validator('active', pre=True) + @field_validator('active', mode='before') + @classmethod def convert_active(cls, v): if isinstance(v, int): return bool(v) @@ -185,14 +186,15 @@ async def get_workflow_detail(filename: str): # Get workflow metadata from database workflows, _ = db.search_workflows(f'filename:"{filename}"', limit=1) if not workflows: - raise HTTPException(status_code=404, detail="Workflow not found") + raise HTTPException(status_code=404, detail="Workflow not found in database") workflow_meta = workflows[0] # Load raw JSON from file file_path = os.path.join("workflows", filename) if not os.path.exists(file_path): - raise HTTPException(status_code=404, detail="Workflow file not found") + print(f"Warning: File {file_path} not found on filesystem but exists in database") + raise HTTPException(status_code=404, detail=f"Workflow file '{filename}' not found on filesystem") with open(file_path, 'r', encoding='utf-8') as f: raw_json = json.load(f) @@ -209,15 +211,22 @@ async def get_workflow_detail(filename: str): @app.get("/api/workflows/{filename}/download") async def download_workflow(filename: str): """Download workflow JSON file.""" - file_path = os.path.join("workflows", filename) - if not os.path.exists(file_path): - raise HTTPException(status_code=404, detail="Workflow file not found") - - return FileResponse( - file_path, - media_type="application/json", - filename=filename - ) + try: + file_path = os.path.join("workflows", filename) + if not os.path.exists(file_path): + print(f"Warning: Download requested for missing file: {file_path}") + raise HTTPException(status_code=404, detail=f"Workflow file '{filename}' not found on filesystem") + + return FileResponse( + file_path, + media_type="application/json", + filename=filename + ) + except FileNotFoundError: + raise HTTPException(status_code=404, detail=f"Workflow file '{filename}' not found") + except Exception as e: + print(f"Error downloading workflow {filename}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error downloading workflow: {str(e)}") @app.get("/api/workflows/{filename}/diagram") async def get_workflow_diagram(filename: str): @@ -225,7 +234,8 @@ async def get_workflow_diagram(filename: str): try: file_path = os.path.join("workflows", filename) if not os.path.exists(file_path): - raise HTTPException(status_code=404, detail="Workflow file not found") + print(f"Warning: Diagram requested for missing file: {file_path}") + raise HTTPException(status_code=404, detail=f"Workflow file '{filename}' not found on filesystem") with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) @@ -237,7 +247,15 @@ async def get_workflow_diagram(filename: str): diagram = generate_mermaid_diagram(nodes, connections) return {"diagram": diagram} + except HTTPException: + raise + except FileNotFoundError: + raise HTTPException(status_code=404, detail=f"Workflow file '{filename}' not found") + except json.JSONDecodeError as e: + print(f"Error parsing JSON in {filename}: {str(e)}") + raise HTTPException(status_code=400, detail=f"Invalid JSON in workflow file: {str(e)}") except Exception as e: + print(f"Error generating diagram for {filename}: {str(e)}") raise HTTPException(status_code=500, detail=f"Error generating diagram: {str(e)}") def generate_mermaid_diagram(nodes: List[Dict], connections: Dict) -> str: diff --git a/cleanup_database.py b/cleanup_database.py new file mode 100644 index 0000000..93d4e2f --- /dev/null +++ b/cleanup_database.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" +Script to clean up the database by removing orphaned workflows +(workflows that exist in database but not on filesystem) +""" + +import os +import sqlite3 +from pathlib import Path + +def cleanup_orphaned_workflows(): + """Remove workflow entries from database that don't have corresponding files.""" + + # Connect to database + db_path = "workflows.db" + if not os.path.exists(db_path): + print("โŒ Database not found. Run the API server first to create the database.") + return + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + try: + # Get all workflow filenames from database + cursor.execute("SELECT filename FROM workflows") + db_workflows = [row[0] for row in cursor.fetchall()] + + # Get all actual workflow files from filesystem + workflows_dir = Path("workflows") + if not workflows_dir.exists(): + print("โŒ Workflows directory not found.") + return + + actual_files = set() + for file_path in workflows_dir.glob("*.json"): + actual_files.add(file_path.name) + + # Find orphaned workflows (in database but not on filesystem) + orphaned = [] + for db_filename in db_workflows: + if db_filename not in actual_files: + orphaned.append(db_filename) + + if not orphaned: + print("โœ… No orphaned workflows found. Database is clean!") + return + + print(f"๐Ÿงน Found {len(orphaned)} orphaned workflows in database:") + for i, filename in enumerate(orphaned[:10], 1): # Show first 10 + print(f" {i}. {filename}") + + if len(orphaned) > 10: + print(f" ... and {len(orphaned) - 10} more") + + # Ask for confirmation + response = input(f"\nโ“ Remove {len(orphaned)} orphaned workflows from database? (y/N): ") + if response.lower() not in ['y', 'yes']: + print("โŒ Operation cancelled.") + return + + # Remove orphaned workflows + placeholders = ','.join(['?' for _ in orphaned]) + cursor.execute(f"DELETE FROM workflows WHERE filename IN ({placeholders})", orphaned) + + # Also remove from FTS table + cursor.execute(f"DELETE FROM workflows_fts WHERE filename IN ({placeholders})", orphaned) + + conn.commit() + print(f"โœ… Removed {len(orphaned)} orphaned workflows from database.") + + # Show updated stats + cursor.execute("SELECT COUNT(*) FROM workflows") + total_count = cursor.fetchone()[0] + print(f"๐Ÿ“Š Database now contains {total_count} workflows.") + + except Exception as e: + print(f"โŒ Error cleaning database: {e}") + conn.rollback() + finally: + conn.close() + +def find_missing_workflows(): + """Find workflow files that exist on filesystem but not in database.""" + + db_path = "workflows.db" + if not os.path.exists(db_path): + print("โŒ Database not found. Run the API server first to create the database.") + return + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + try: + # Get all workflow filenames from database + cursor.execute("SELECT filename FROM workflows") + db_workflows = set(row[0] for row in cursor.fetchall()) + + # Get all actual workflow files from filesystem + workflows_dir = Path("workflows") + if not workflows_dir.exists(): + print("โŒ Workflows directory not found.") + return + + actual_files = [] + for file_path in workflows_dir.glob("*.json"): + actual_files.append(file_path.name) + + # Find missing workflows (on filesystem but not in database) + missing = [] + for filename in actual_files: + if filename not in db_workflows: + missing.append(filename) + + if not missing: + print("โœ… All workflow files are indexed in database!") + return + + print(f"๐Ÿ“ Found {len(missing)} workflow files not in database:") + for i, filename in enumerate(missing[:10], 1): # Show first 10 + print(f" {i}. {filename}") + + if len(missing) > 10: + print(f" ... and {len(missing) - 10} more") + + print(f"\n๐Ÿ’ก Run 'curl -X POST http://localhost:8000/api/reindex?force=true' to reindex all workflows.") + + except Exception as e: + print(f"โŒ Error checking for missing workflows: {e}") + finally: + conn.close() + +def show_database_stats(): + """Show current database statistics.""" + + db_path = "workflows.db" + if not os.path.exists(db_path): + print("โŒ Database not found. Run the API server first to create the database.") + return + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + try: + # Get total workflows + cursor.execute("SELECT COUNT(*) FROM workflows") + total = cursor.fetchone()[0] + + # Get active/inactive counts + cursor.execute("SELECT COUNT(*) FROM workflows WHERE active = 1") + active = cursor.fetchone()[0] + inactive = total - active + + # Get trigger type distribution + cursor.execute("SELECT trigger_type, COUNT(*) FROM workflows GROUP BY trigger_type ORDER BY COUNT(*) DESC") + triggers = cursor.fetchall() + + # Show filesystem stats + workflows_dir = Path("workflows") + if workflows_dir.exists(): + actual_files = len(list(workflows_dir.glob("*.json"))) + else: + actual_files = 0 + + print("๐Ÿ“Š Database Statistics:") + print(f" Total workflows in DB: {total}") + print(f" Active workflows: {active}") + print(f" Inactive workflows: {inactive}") + print(f" Files on filesystem: {actual_files}") + + if total != actual_files: + print(f" โš ๏ธ Database/filesystem mismatch: {abs(total - actual_files)} difference") + + print("\n๐ŸŽฏ Trigger Types:") + for trigger_type, count in triggers: + print(f" {trigger_type}: {count}") + + except Exception as e: + print(f"โŒ Error getting database stats: {e}") + finally: + conn.close() + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1: + command = sys.argv[1].lower() + if command == "cleanup": + cleanup_orphaned_workflows() + elif command == "missing": + find_missing_workflows() + elif command == "stats": + show_database_stats() + else: + print("โŒ Unknown command. Use: cleanup, missing, or stats") + else: + print("๐Ÿงน Database Cleanup Tool") + print("\nAvailable commands:") + print(" python3 cleanup_database.py cleanup - Remove orphaned workflows from database") + print(" python3 cleanup_database.py missing - Find workflows missing from database") + print(" python3 cleanup_database.py stats - Show database statistics") + print("\nRunning stats by default...\n") + show_database_stats() \ No newline at end of file diff --git a/workflows.db b/workflows.db index 3124df0..46a8528 100644 Binary files a/workflows.db and b/workflows.db differ