from fastapi import APIRouter, UploadFile, File, HTTPException from fastapi.responses import StreamingResponse import io import logging from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE from services.stt_service import speech_to_text from services.tts_service import generate_tts from services.chatbot_service import get_chatbot_response from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse logger = logging.getLogger(__name__) router = APIRouter(prefix="/audio", tags=["Audio"]) @router.post("/tts") async def tts(request: TTSRequest): """ Convert text to speech and return audio file. Example: - POST /audio/tts - Body: {"text": "Hello, welcome to our system"} - Returns: WAV audio file """ try: logger.info(f"TTS request received for text: '{request.text}'") audio_bytes = await generate_tts(request.text) return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav") except Exception as e: logger.error(f"TTS error: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/stt", response_model=STTResponse) async def stt(file: UploadFile = File(...)): """ Convert audio file to text. Example: - POST /audio/stt - File: audio.mp3 (or .wav, .m4a) - Returns: {"text": "transcribed text", "model_name": "gemini-2.5-flash", ...} """ # Validate file type if file.content_type not in ALLOWED_AUDIO_TYPES: raise HTTPException( status_code=400, detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A" ) try: logger.info(f"STT request received for file: {file.filename}") audio_bytes = await file.read() # Check file size if len(audio_bytes) > MAX_AUDIO_SIZE: raise HTTPException( status_code=400, detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB" ) text = await speech_to_text(audio_bytes, file.filename) return STTResponse( text=text, model_name="gemini-2.5-flash", language="en", duration_seconds=None ) except Exception as e: logger.error(f"STT error: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/chatbot") async def chatbot_voice(file: UploadFile = File(...)): """ Full voice chatbot flow (Audio → Text → Response → Audio). Example: - POST /audio/chatbot - File: user_voice.mp3 - Returns: Response audio file (WAV) Process: 1. Converts user's audio to text (STT) 2. Generates chatbot response to user's text 3. Converts response back to audio (TTS) """ # Validate file type if file.content_type not in ALLOWED_AUDIO_TYPES: raise HTTPException( status_code=400, detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A" ) try: logger.info(f"Voice chatbot request received for file: {file.filename}") # Step 1: Convert audio to text audio_bytes = await file.read() # Check file size if len(audio_bytes) > MAX_AUDIO_SIZE: raise HTTPException( status_code=400, detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB" ) user_text = await speech_to_text(audio_bytes, file.filename) logger.info(f"Step 1 - STT: {user_text}") # Step 2: Generate chatbot response response_text = await get_chatbot_response(user_text) logger.info(f"Step 2 - Response: {response_text}") # Step 3: Convert response to audio audio_response = await generate_tts(response_text) logger.info("Step 3 - TTS: Complete") return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav") except Exception as e: logger.error(f"Voice chatbot error: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/chatbot-text", response_model=ChatbotResponse) async def chatbot_text(request: ChatbotRequest): """ Chatbot interaction with text input/output (no audio). Example: - POST /audio/chatbot-text - Body: {"text": "What is the capital of France?"} - Returns: {"user_input": "What is...", "bot_response": "The capital...", ...} """ try: logger.info(f"Text chatbot request: {request.text}") response_text = await get_chatbot_response(request.text) return ChatbotResponse( user_input=request.text, bot_response=response_text, model_name="gemini-2.5-flash" ) except Exception as e: logger.error(f"Text chatbot error: {str(e)}") raise HTTPException(status_code=500, detail=str(e))