File size: 5,055 Bytes
9aa985d 4a13628 918acab 544d113 4a13628 c7fc3b6 218c6a3 73d4f3c 4a13628 d4b6133 4a13628 218c6a3 4a13628 9aa985d 4a13628 9aa985d 4a13628 9aa985d c7fc3b6 4a13628 73d4f3c 520a06a 4a13628 520a06a 4a13628 520a06a 4a13628 520a06a 4a13628 9aa985d 4a13628 d4b6133 918acab 218c6a3 4a13628 9aa985d 4a13628 9aa985d 544d113 4a13628 d4b6133 4a13628 d4b6133 4a13628 d4b6133 4a13628 d4b6133 4a13628 544d113 4a13628 544d113 918acab d4b6133 4a13628 544d113 4a13628 544d113 4a13628 544d113 4a13628 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse
import io
import logging
from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE
from services.stt_service import speech_to_text
from services.tts_service import generate_tts
from services.chatbot_service import get_chatbot_response
from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/audio", tags=["Audio"])
@router.post("/tts")
async def tts(request: TTSRequest):
"""
Convert text to speech and return audio file.
Example:
- POST /audio/tts
- Body: {"text": "Hello, welcome to our system"}
- Returns: WAV audio file
"""
try:
logger.info(f"TTS request received for text: '{request.text}'")
audio_bytes = await generate_tts(request.text)
return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav")
except Exception as e:
logger.error(f"TTS error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/stt", response_model=STTResponse)
async def stt(file: UploadFile = File(...)):
"""
Convert audio file to text.
Example:
- POST /audio/stt
- File: audio.mp3 (or .wav, .m4a)
- Returns: {"text": "transcribed text", "model_name": "gemini-2.5-flash", ...}
"""
# Validate file type
if file.content_type not in ALLOWED_AUDIO_TYPES:
raise HTTPException(
status_code=400,
detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
)
try:
logger.info(f"STT request received for file: {file.filename}")
audio_bytes = await file.read()
# Check file size
if len(audio_bytes) > MAX_AUDIO_SIZE:
raise HTTPException(
status_code=400,
detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
)
text = await speech_to_text(audio_bytes, file.filename)
return STTResponse(
text=text,
model_name="gemini-2.5-flash",
language="en",
duration_seconds=None
)
except Exception as e:
logger.error(f"STT error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/chatbot")
async def chatbot_voice(file: UploadFile = File(...)):
"""
Full voice chatbot flow (Audio → Text → Response → Audio).
Example:
- POST /audio/chatbot
- File: user_voice.mp3
- Returns: Response audio file (WAV)
Process:
1. Converts user's audio to text (STT)
2. Generates chatbot response to user's text
3. Converts response back to audio (TTS)
"""
# Validate file type
if file.content_type not in ALLOWED_AUDIO_TYPES:
raise HTTPException(
status_code=400,
detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
)
try:
logger.info(f"Voice chatbot request received for file: {file.filename}")
# Step 1: Convert audio to text
audio_bytes = await file.read()
# Check file size
if len(audio_bytes) > MAX_AUDIO_SIZE:
raise HTTPException(
status_code=400,
detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
)
user_text = await speech_to_text(audio_bytes, file.filename)
logger.info(f"Step 1 - STT: {user_text}")
# Step 2: Generate chatbot response
response_text = await get_chatbot_response(user_text)
logger.info(f"Step 2 - Response: {response_text}")
# Step 3: Convert response to audio
audio_response = await generate_tts(response_text)
logger.info("Step 3 - TTS: Complete")
return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav")
except Exception as e:
logger.error(f"Voice chatbot error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/chatbot-text", response_model=ChatbotResponse)
async def chatbot_text(request: ChatbotRequest):
"""
Chatbot interaction with text input/output (no audio).
Example:
- POST /audio/chatbot-text
- Body: {"text": "What is the capital of France?"}
- Returns: {"user_input": "What is...", "bot_response": "The capital...", ...}
"""
try:
logger.info(f"Text chatbot request: {request.text}")
response_text = await get_chatbot_response(request.text)
return ChatbotResponse(
user_input=request.text,
bot_response=response_text,
model_name="gemini-2.5-flash"
)
except Exception as e:
logger.error(f"Text chatbot error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e)) |