system-prompts-and-models-o.../revolutionary_voice_agent.py
Sahiix@1 30e89fc1cb
Build project (#2)
* Checkpoint before follow-up message

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Add system prompts for various AI tools and assistants

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Add N8N workflow documentation API with database and import tools

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Checkpoint before follow-up message

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Create Revolutionary AI Agent with voice and video capabilities

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Create advanced voice processor with character-based voice synthesis

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Create Revolutionary AI Agent System with voice and video capabilities

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Checkpoint before follow-up message

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

* Add full-stack, ML, and blockchain capabilities to Ultimate AI Agent

Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>

---------

Co-authored-by: Cursor Agent <cursoragent@cursor.com>
Co-authored-by: escapethematrixmate01 <escapethematrixmate01@gmail.com>
2025-08-06 05:32:03 +00:00

843 lines
32 KiB
Python

#!/usr/bin/env python3
"""
Revolutionary AI Agent System - The Most Advanced AI Agent with Voice Actor Capabilities
Integrates all workflow capabilities, AI tools, and agent frameworks with cutting-edge voice synthesis
"""
import os
import json
import asyncio
import sqlite3
import requests
import subprocess
import threading
import queue
import time
from datetime import datetime
from typing import Dict, List, Any, Optional, Union, Callable
from pathlib import Path
import uvicorn
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
import aiofiles
import aiofiles.os
from jinja2 import Environment, FileSystemLoader
import openai
import anthropic
import replicate
from dataclasses import dataclass
import logging
import numpy as np
import soundfile as sf
import librosa
from scipy import signal
import cv2
import mediapipe as mp
from transformers import pipeline, AutoTokenizer, AutoModel
import torch
import torchaudio
import whisper
import edge_tts
import pyttsx3
import speech_recognition as sr
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image, ImageDraw, ImageFont
import io
import base64
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize FastAPI app
app = FastAPI(
title="Revolutionary AI Agent System",
description="The most advanced AI agent with voice actor capabilities",
version="4.0.0"
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Configuration
CONFIG = {
"openai_api_key": os.getenv("OPENAI_API_KEY"),
"anthropic_api_key": os.getenv("ANTHROPIC_API_KEY"),
"replicate_api_key": os.getenv("REPLICATE_API_KEY"),
"elevenlabs_api_key": os.getenv("ELEVENLABS_API_KEY"),
"database_path": "revolutionary_agent.db",
"workflows_dir": "n8n-workflows/workflows",
"tools_dir": ".",
"max_concurrent_tasks": 20,
"default_model": "gpt-4",
"claude_model": "claude-3-sonnet-20240229",
"voice_model": "eleven_multilingual_v2",
"audio_sample_rate": 44100,
"video_fps": 30,
"max_audio_duration": 300, # 5 minutes
"max_video_duration": 600, # 10 minutes
}
# Data Models
class AgentTask(BaseModel):
task_id: str
task_type: str
description: str
parameters: Dict[str, Any] = {}
priority: int = Field(default=1, ge=1, le=10)
status: str = "pending"
created_at: datetime = Field(default_factory=datetime.now)
updated_at: datetime = Field(default_factory=datetime.now)
class AgentResponse(BaseModel):
task_id: str
status: str
result: Optional[Dict[str, Any]] = None
error: Optional[str] = None
execution_time: float = 0.0
class VoiceActorRequest(BaseModel):
text: str
voice_id: str = "pNInz6obpgDQGcFmaJgB" # Adam voice
model_id: str = "eleven_multilingual_v2"
voice_settings: Dict[str, Any] = {
"stability": 0.5,
"similarity_boost": 0.75,
"style": 0.0,
"use_speaker_boost": True
}
class VideoGenerationRequest(BaseModel):
prompt: str
duration: int = 10
resolution: str = "1920x1080"
fps: int = 30
style: str = "cinematic"
voice_over: Optional[str] = None
@dataclass
class AgentCapability:
name: str
description: str
category: str
tools: List[str]
models: List[str]
workflows: List[str]
voice_enabled: bool = False
class RevolutionaryAIAgent:
"""Revolutionary AI Agent with advanced voice actor capabilities"""
def __init__(self):
self.capabilities = self._load_capabilities()
self.active_tasks = {}
self.workflow_cache = {}
self.tool_registry = self._load_tools()
self.model_registry = self._load_models()
self.voice_registry = self._load_voice_models()
self.memory_system = self._init_memory_system()
self.planning_system = self._init_planning_system()
self.voice_processor = self._init_voice_processor()
self.video_processor = self._init_video_processor()
self.audio_queue = queue.Queue()
self.video_queue = queue.Queue()
def _load_capabilities(self) -> Dict[str, AgentCapability]:
"""Load all agent capabilities with voice actor features"""
capabilities = {
"voice_acting": AgentCapability(
name="Voice Acting",
description="Professional voice acting with emotional expression and character voices",
category="multimodal",
tools=["voice_synthesis", "emotion_analysis", "character_voice", "audio_processing"],
models=["elevenlabs", "openai_tts", "whisper", "claude_3"],
workflows=["voice_generation", "character_development", "script_analysis"],
voice_enabled=True
),
"video_production": AgentCapability(
name="Video Production",
description="Create professional videos with AI-generated content and voice-overs",
category="multimodal",
tools=["video_generation", "scene_composition", "audio_sync", "visual_effects"],
models=["stable_diffusion", "runway_ml", "dall_e_3", "midjourney"],
workflows=["video_script", "scene_planning", "post_production"],
voice_enabled=True
),
"character_development": AgentCapability(
name="Character Development",
description="Create and develop complex AI characters with unique personalities",
category="ai_development",
tools=["personality_engine", "memory_system", "behavior_modeling"],
models=["gpt-4", "claude-3-sonnet", "anthropic_character"],
workflows=["character_creation", "personality_training", "interaction_modeling"],
voice_enabled=True
),
"advanced_code_generation": AgentCapability(
name="Advanced Code Generation",
description="Generate complex, production-ready code with voice explanations",
category="development",
tools=["codebase_search", "file_read", "file_write", "run_terminal_cmd", "voice_code_explanation"],
models=["gpt-4", "claude-3-sonnet", "codellama", "github_copilot"],
workflows=["code_review", "bug_fix", "feature_implementation", "architecture_design"],
voice_enabled=True
),
"workflow_orchestration": AgentCapability(
name="Workflow Orchestration",
description="Orchestrate complex workflows with voice-guided execution",
category="automation",
tools=["workflow_executor", "api_integration", "data_processing", "voice_guidance"],
models=["gpt-4", "claude-3-sonnet"],
workflows=["data_pipeline", "api_automation", "notification_system", "voice_automation"],
voice_enabled=True
),
"multimodal_ai_analysis": AgentCapability(
name="Multimodal AI Analysis",
description="Analyze data, generate insights, and create reports with voice narration",
category="analytics",
tools=["data_analysis", "visualization", "report_generation", "voice_narration"],
models=["gpt-4", "claude-3-sonnet", "dall-e-3", "whisper"],
workflows=["data_analysis", "report_generation", "insight_extraction", "voice_presentation"],
voice_enabled=True
),
"system_integration": AgentCapability(
name="System Integration",
description="Integrate with external APIs and services with voice feedback",
category="integration",
tools=["api_client", "webhook_handler", "database_connector", "voice_status"],
models=["gpt-4", "claude-3-sonnet"],
workflows=["api_integration", "data_sync", "service_orchestration", "voice_monitoring"],
voice_enabled=True
)
}
return capabilities
def _load_tools(self) -> Dict[str, Dict]:
"""Load all available tools including voice and video processing"""
tools = {}
# Load Cursor tools
try:
with open("Cursor Prompts/Agent Tools v1.0.json", "r") as f:
cursor_tools = json.load(f)
for tool in cursor_tools:
tools[f"cursor_{tool['name']}"] = tool
except FileNotFoundError:
logger.warning("Cursor tools not found")
# Load Manus tools
try:
with open("Manus Agent Tools & Prompt/tools.json", "r") as f:
manus_tools = json.load(f)
for tool in manus_tools:
if "function" in tool:
tools[f"manus_{tool['function']['name']}"] = tool
except FileNotFoundError:
logger.warning("Manus tools not found")
# Add revolutionary tools
tools.update({
"voice_synthesis": {
"name": "voice_synthesis",
"description": "Generate high-quality voice synthesis with emotional expression",
"parameters": {
"text": {"type": "string"},
"voice_id": {"type": "string"},
"emotion": {"type": "string", "enum": ["happy", "sad", "angry", "calm", "excited", "professional"]},
"speed": {"type": "number", "minimum": 0.5, "maximum": 2.0},
"pitch": {"type": "number", "minimum": -20, "maximum": 20}
}
},
"character_voice": {
"name": "character_voice",
"description": "Create and manage character voices with unique personalities",
"parameters": {
"character_name": {"type": "string"},
"personality": {"type": "object"},
"voice_characteristics": {"type": "object"},
"dialogue": {"type": "string"}
}
},
"video_generation": {
"name": "video_generation",
"description": "Generate professional videos with AI content and voice-overs",
"parameters": {
"prompt": {"type": "string"},
"duration": {"type": "integer"},
"resolution": {"type": "string"},
"style": {"type": "string"},
"voice_over": {"type": "string"}
}
},
"emotion_analysis": {
"name": "emotion_analysis",
"description": "Analyze emotional content in text, audio, and video",
"parameters": {
"content": {"type": "string"},
"content_type": {"type": "string", "enum": ["text", "audio", "video"]},
"analysis_depth": {"type": "string", "enum": ["basic", "detailed", "comprehensive"]}
}
},
"ai_model_caller": {
"name": "ai_model_caller",
"description": "Call various AI models for different tasks",
"parameters": {
"model": {"type": "string"},
"prompt": {"type": "string"},
"parameters": {"type": "object"}
}
},
"workflow_executor": {
"name": "workflow_executor",
"description": "Execute n8n workflows with voice-guided execution",
"parameters": {
"workflow_name": {"type": "string"},
"input_data": {"type": "object"},
"execution_mode": {"type": "string", "enum": ["sync", "async", "streaming", "voice_guided"]}
}
}
})
return tools
def _load_models(self) -> Dict[str, Dict]:
"""Load available AI models including voice and video models"""
return {
"gpt-4": {
"provider": "openai",
"capabilities": ["text", "code", "reasoning"],
"max_tokens": 8192
},
"gpt-4-vision": {
"provider": "openai",
"capabilities": ["text", "image", "code", "reasoning"],
"max_tokens": 4096
},
"claude-3-sonnet": {
"provider": "anthropic",
"capabilities": ["text", "code", "reasoning"],
"max_tokens": 200000
},
"claude-3-sonnet-vision": {
"provider": "anthropic",
"capabilities": ["text", "image", "code", "reasoning"],
"max_tokens": 200000
},
"elevenlabs": {
"provider": "elevenlabs",
"capabilities": ["voice_synthesis", "voice_cloning", "emotion_control"],
"max_tokens": None
},
"whisper": {
"provider": "openai",
"capabilities": ["audio_transcription", "language_detection"],
"max_tokens": None
},
"stable-diffusion": {
"provider": "replicate",
"capabilities": ["image_generation", "video_generation"],
"max_tokens": None
},
"runway_ml": {
"provider": "runway",
"capabilities": ["video_generation", "video_editing"],
"max_tokens": None
}
}
def _load_voice_models(self) -> Dict[str, Dict]:
"""Load available voice models and characters"""
return {
"eleven_multilingual_v2": {
"provider": "elevenlabs",
"languages": ["en", "es", "fr", "de", "it", "pt", "pl", "hi", "ja", "ko", "zh"],
"emotions": ["happy", "sad", "angry", "calm", "excited", "professional"],
"voices": {
"adam": "pNInz6obpgDQGcFmaJgB",
"bella": "EXAVITQu4vr4xnSDxMaL",
"charlie": "VR6AewLTigWG4xSOukaG",
"diana": "21m00Tcm4TlvDq8ikWAM",
"eve": "AZnzlk1XvdvUeBnXmlld"
}
},
"openai_tts": {
"provider": "openai",
"voices": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
"formats": ["mp3", "opus", "aac", "flac"]
},
"edge_tts": {
"provider": "microsoft",
"voices": ["en-US-JennyNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
"languages": ["en-US", "en-GB", "es-ES", "fr-FR", "de-DE"]
}
}
def _init_memory_system(self):
"""Initialize advanced memory system"""
return {
"short_term": [],
"long_term": {},
"character_memories": {},
"conversation_history": [],
"task_memory": {}
}
def _init_planning_system(self):
"""Initialize advanced planning system"""
return {
"current_plan": None,
"task_queue": [],
"execution_history": [],
"planning_engine": "advanced"
}
def _init_voice_processor(self):
"""Initialize voice processing capabilities"""
return {
"synthesis_engine": "elevenlabs",
"recognition_engine": "whisper",
"emotion_analyzer": "advanced",
"character_voices": {},
"audio_cache": {}
}
def _init_video_processor(self):
"""Initialize video processing capabilities"""
return {
"generation_engine": "stable_diffusion",
"editing_engine": "moviepy",
"composition_engine": "advanced",
"video_cache": {},
"scene_templates": {}
}
# Initialize the agent
agent = RevolutionaryAIAgent()
# API Routes
@app.get("/", response_class=HTMLResponse)
async def root():
"""Main dashboard for the Revolutionary AI Agent"""
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Revolutionary AI Agent System</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
margin: 0;
padding: 0;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
min-height: 100vh;
}
.container {
max-width: 1400px;
margin: 0 auto;
padding: 20px;
}
.header {
text-align: center;
margin-bottom: 40px;
padding: 40px 0;
}
.header h1 {
font-size: 3.5em;
margin: 0;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
background: linear-gradient(45deg, #ff6b6b, #4ecdc4, #45b7d1, #96ceb4);
background-size: 400% 400%;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
animation: gradient 3s ease infinite;
}
@keyframes gradient {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
.capabilities {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
gap: 25px;
margin-bottom: 40px;
}
.capability {
background: rgba(255,255,255,0.1);
padding: 25px;
border-radius: 15px;
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.2);
transition: transform 0.3s ease, box-shadow 0.3s ease;
}
.capability:hover {
transform: translateY(-5px);
box-shadow: 0 10px 30px rgba(0,0,0,0.3);
}
.capability h3 {
color: #fff;
margin-top: 0;
font-size: 1.5em;
margin-bottom: 15px;
}
.capability p {
line-height: 1.6;
margin-bottom: 15px;
}
.capability ul {
list-style: none;
padding: 0;
}
.capability li {
padding: 5px 0;
position: relative;
padding-left: 20px;
}
.capability li:before {
content: "";
position: absolute;
left: 0;
}
.status {
padding: 15px;
border-radius: 10px;
margin: 20px 0;
text-align: center;
font-weight: bold;
}
.status.success {
background: rgba(76, 175, 80, 0.2);
border: 1px solid rgba(76, 175, 80, 0.5);
}
.status.info {
background: rgba(33, 150, 243, 0.2);
border: 1px solid rgba(33, 150, 243, 0.5);
}
.voice-demo {
background: rgba(255,255,255,0.1);
padding: 20px;
border-radius: 15px;
margin: 20px 0;
text-align: center;
}
.voice-demo button {
background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
border: none;
padding: 15px 30px;
border-radius: 25px;
color: white;
font-size: 1.1em;
cursor: pointer;
margin: 10px;
transition: transform 0.2s ease;
}
.voice-demo button:hover {
transform: scale(1.05);
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎭 Revolutionary AI Agent System</h1>
<p style="font-size: 1.3em; margin-top: 10px;">The Most Advanced AI Agent with Professional Voice Actor Capabilities</p>
</div>
<div class="status success">
<strong>🚀 System Status:</strong> Online and ready to revolutionize AI interaction
</div>
<div class="voice-demo">
<h3>🎤 Voice Actor Demo</h3>
<p>Experience the revolutionary voice capabilities</p>
<button onclick="testVoice()">Test Voice Synthesis</button>
<button onclick="testCharacter()">Test Character Voice</button>
<button onclick="testVideo()">Generate Video with Voice</button>
</div>
<h2 style="text-align: center; margin: 40px 0;">Revolutionary Capabilities</h2>
<div class="capabilities">
<div class="capability">
<h3>🎭 Voice Acting</h3>
<p>Professional voice acting with emotional expression and character voices</p>
<ul>
<li>Multi-language voice synthesis</li>
<li>Emotional expression control</li>
<li>Character voice development</li>
<li>Real-time voice generation</li>
</ul>
</div>
<div class="capability">
<h3>🎬 Video Production</h3>
<p>Create professional videos with AI-generated content and voice-overs</p>
<ul>
<li>AI video generation</li>
<li>Professional voice-overs</li>
<li>Scene composition</li>
<li>Visual effects integration</li>
</ul>
</div>
<div class="capability">
<h3>👤 Character Development</h3>
<p>Create and develop complex AI characters with unique personalities</p>
<ul>
<li>Personality engine</li>
<li>Memory systems</li>
<li>Behavior modeling</li>
<li>Character voice training</li>
</ul>
</div>
<div class="capability">
<h3>💻 Advanced Code Generation</h3>
<p>Generate complex, production-ready code with voice explanations</p>
<ul>
<li>Multi-language support</li>
<li>Voice code explanations</li>
<li>Architecture design</li>
<li>Real-time collaboration</li>
</ul>
</div>
<div class="capability">
<h3>⚙️ Workflow Orchestration</h3>
<p>Orchestrate complex workflows with voice-guided execution</p>
<ul>
<li>Voice-guided automation</li>
<li>Complex workflow execution</li>
<li>Real-time monitoring</li>
<li>Intelligent decision making</li>
</ul>
</div>
<div class="capability">
<h3>📊 Multimodal AI Analysis</h3>
<p>Analyze data, generate insights, and create reports with voice narration</p>
<ul>
<li>Voice-narrated reports</li>
<li>Data visualization</li>
<li>Predictive analytics</li>
<li>Interactive presentations</li>
</ul>
</div>
</div>
<div class="status info">
<strong>🔗 API Documentation:</strong> Available at <a href="/docs" style="color: #fff;">/docs</a>
</div>
</div>
<script>
async function testVoice() {
const response = await fetch('/api/voice/synthesize', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
text: "Hello! I am your revolutionary AI agent with professional voice acting capabilities. Welcome to the future of AI interaction!",
voice_id: "pNInz6obpgDQGcFmaJgB",
emotion: "excited"
})
});
const audioBlob = await response.blob();
const audio = new Audio(URL.createObjectURL(audioBlob));
audio.play();
}
async function testCharacter() {
const response = await fetch('/api/voice/character', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
character_name: "Professor AI",
dialogue: "Greetings! I am Professor AI, your knowledgeable companion. Together, we shall explore the frontiers of artificial intelligence and create something truly extraordinary!",
personality: {
"traits": ["intelligent", "enthusiastic", "helpful"],
"speaking_style": "academic but friendly"
}
})
});
const audioBlob = await response.blob();
const audio = new Audio(URL.createObjectURL(audioBlob));
audio.play();
}
async function testVideo() {
const response = await fetch('/api/video/generate', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
prompt: "A futuristic AI laboratory with holographic displays and floating screens",
duration: 10,
voice_over: "Welcome to the future of AI research and development. Here, we push the boundaries of what's possible."
})
});
const videoBlob = await response.blob();
const video = document.createElement('video');
video.src = URL.createObjectURL(videoBlob);
video.controls = true;
video.style.width = '100%';
video.style.maxWidth = '600px';
video.style.margin = '20px auto';
video.style.display = 'block';
document.querySelector('.voice-demo').appendChild(video);
}
</script>
</body>
</html>
"""
return HTMLResponse(content=html_content)
@app.post("/api/voice/synthesize")
async def synthesize_voice(request: VoiceActorRequest):
"""Synthesize voice using ElevenLabs API"""
try:
if not CONFIG["elevenlabs_api_key"]:
raise HTTPException(status_code=500, detail="ElevenLabs API key not configured")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{request.voice_id}"
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": CONFIG["elevenlabs_api_key"]
}
data = {
"text": request.text,
"model_id": request.model_id,
"voice_settings": request.voice_settings
}
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
return StreamingResponse(
io.BytesIO(response.content),
media_type="audio/mpeg",
headers={"Content-Disposition": "attachment; filename=voice.mp3"}
)
else:
raise HTTPException(status_code=response.status_code, detail="Voice synthesis failed")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/voice/character")
async def create_character_voice(request: Dict[str, Any]):
"""Create a character voice with specific personality"""
try:
character_name = request.get("character_name", "Character")
dialogue = request.get("dialogue", "Hello, I am a character.")
personality = request.get("personality", {"traits": ["friendly"], "speaking_style": "neutral"})
# Analyze personality and adjust voice parameters
voice_settings = _analyze_personality_for_voice(personality)
# Generate character-specific voice
voice_request = VoiceActorRequest(
text=dialogue,
voice_id=voice_settings["voice_id"],
voice_settings={
"stability": 0.5,
"similarity_boost": 0.75,
"style": 0.0,
"use_speaker_boost": True
}
)
return await synthesize_voice(voice_request)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/video/generate")
async def generate_video(request: VideoGenerationRequest):
"""Generate video with AI content and voice-over"""
try:
# This would integrate with video generation APIs
# For now, return a placeholder response
return {
"status": "processing",
"video_id": f"video_{int(time.time())}",
"message": "Video generation started. This feature requires additional setup."
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/capabilities")
async def get_capabilities():
"""Get all available agent capabilities"""
return {
"capabilities": {
name: {
"name": cap.name,
"description": cap.description,
"category": cap.category,
"tools": cap.tools,
"models": cap.models,
"workflows": cap.workflows,
"voice_enabled": cap.voice_enabled
}
for name, cap in agent.capabilities.items()
}
}
@app.get("/api/tools")
async def get_tools():
"""Get all available tools"""
return {"tools": agent.tool_registry}
@app.get("/api/models")
async def get_models():
"""Get all available AI models"""
return {"models": agent.model_registry}
@app.get("/api/voice/models")
async def get_voice_models():
"""Get all available voice models"""
return {"voice_models": agent.voice_registry}
@app.get("/api/health")
async def health_check():
"""Health check endpoint"""
return {
"status": "healthy",
"timestamp": datetime.now().isoformat(),
"capabilities_count": len(agent.capabilities),
"tools_count": len(agent.tool_registry),
"models_count": len(agent.model_registry),
"voice_models_count": len(agent.voice_registry)
}
def _analyze_personality_for_voice(personality: Dict) -> Dict:
"""Analyze personality traits to determine voice characteristics"""
traits = personality.get("traits", [])
speaking_style = personality.get("speaking_style", "neutral")
# Map personality to voice settings
voice_mapping = {
"intelligent": {"voice_id": "pNInz6obpgDQGcFmaJgB", "emotion": "professional"},
"friendly": {"voice_id": "EXAVITQu4vr4xnSDxMaL", "emotion": "happy"},
"serious": {"voice_id": "VR6AewLTigWG4xSOukaG", "emotion": "calm"},
"energetic": {"voice_id": "21m00Tcm4TlvDq8ikWAM", "emotion": "excited"}
}
# Default to intelligent/professional
return voice_mapping.get(traits[0] if traits else "intelligent",
voice_mapping["intelligent"])
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)