|
|
from fastapi import APIRouter, UploadFile, File, HTTPException |
|
|
from fastapi.responses import StreamingResponse |
|
|
import io |
|
|
import logging |
|
|
from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE |
|
|
from services.stt_service import speech_to_text |
|
|
from services.tts_service import generate_tts |
|
|
from services.chatbot_service import get_chatbot_response |
|
|
from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
router = APIRouter(prefix="/audio", tags=["Audio"]) |
|
|
|
|
|
|
|
|
@router.post("/tts") |
|
|
async def tts(request: TTSRequest): |
|
|
""" |
|
|
Convert text to speech and return audio file. |
|
|
|
|
|
Example: |
|
|
- POST /audio/tts |
|
|
- Body: {"text": "Hello, welcome to our system"} |
|
|
- Returns: WAV audio file |
|
|
""" |
|
|
try: |
|
|
logger.info(f"TTS request received for text: '{request.text}'") |
|
|
audio_bytes = await generate_tts(request.text) |
|
|
return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav") |
|
|
except Exception as e: |
|
|
logger.error(f"TTS error: {str(e)}") |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
|
|
|
@router.post("/stt", response_model=STTResponse) |
|
|
async def stt(file: UploadFile = File(...)): |
|
|
""" |
|
|
Convert audio file to text. |
|
|
|
|
|
Example: |
|
|
- POST /audio/stt |
|
|
- File: audio.mp3 (or .wav, .m4a) |
|
|
- Returns: {"text": "transcribed text", "model_name": "gemini-2.5-flash", ...} |
|
|
""" |
|
|
|
|
|
if file.content_type not in ALLOWED_AUDIO_TYPES: |
|
|
raise HTTPException( |
|
|
status_code=400, |
|
|
detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A" |
|
|
) |
|
|
|
|
|
try: |
|
|
logger.info(f"STT request received for file: {file.filename}") |
|
|
audio_bytes = await file.read() |
|
|
|
|
|
|
|
|
if len(audio_bytes) > MAX_AUDIO_SIZE: |
|
|
raise HTTPException( |
|
|
status_code=400, |
|
|
detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB" |
|
|
) |
|
|
|
|
|
text = await speech_to_text(audio_bytes, file.filename) |
|
|
|
|
|
return STTResponse( |
|
|
text=text, |
|
|
model_name="gemini-2.5-flash", |
|
|
language="en", |
|
|
duration_seconds=None |
|
|
) |
|
|
except Exception as e: |
|
|
logger.error(f"STT error: {str(e)}") |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
|
|
|
@router.post("/chatbot") |
|
|
async def chatbot_voice(file: UploadFile = File(...)): |
|
|
""" |
|
|
Full voice chatbot flow (Audio → Text → Response → Audio). |
|
|
|
|
|
Example: |
|
|
- POST /audio/chatbot |
|
|
- File: user_voice.mp3 |
|
|
- Returns: Response audio file (WAV) |
|
|
|
|
|
Process: |
|
|
1. Converts user's audio to text (STT) |
|
|
2. Generates chatbot response to user's text |
|
|
3. Converts response back to audio (TTS) |
|
|
""" |
|
|
|
|
|
if file.content_type not in ALLOWED_AUDIO_TYPES: |
|
|
raise HTTPException( |
|
|
status_code=400, |
|
|
detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A" |
|
|
) |
|
|
|
|
|
try: |
|
|
logger.info(f"Voice chatbot request received for file: {file.filename}") |
|
|
|
|
|
|
|
|
audio_bytes = await file.read() |
|
|
|
|
|
|
|
|
if len(audio_bytes) > MAX_AUDIO_SIZE: |
|
|
raise HTTPException( |
|
|
status_code=400, |
|
|
detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB" |
|
|
) |
|
|
|
|
|
user_text = await speech_to_text(audio_bytes, file.filename) |
|
|
logger.info(f"Step 1 - STT: {user_text}") |
|
|
|
|
|
|
|
|
response_text = await get_chatbot_response(user_text) |
|
|
logger.info(f"Step 2 - Response: {response_text}") |
|
|
|
|
|
|
|
|
audio_response = await generate_tts(response_text) |
|
|
logger.info("Step 3 - TTS: Complete") |
|
|
|
|
|
return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Voice chatbot error: {str(e)}") |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
|
|
|
@router.post("/chatbot-text", response_model=ChatbotResponse) |
|
|
async def chatbot_text(request: ChatbotRequest): |
|
|
""" |
|
|
Chatbot interaction with text input/output (no audio). |
|
|
|
|
|
Example: |
|
|
- POST /audio/chatbot-text |
|
|
- Body: {"text": "What is the capital of France?"} |
|
|
- Returns: {"user_input": "What is...", "bot_response": "The capital...", ...} |
|
|
""" |
|
|
try: |
|
|
logger.info(f"Text chatbot request: {request.text}") |
|
|
response_text = await get_chatbot_response(request.text) |
|
|
|
|
|
return ChatbotResponse( |
|
|
user_input=request.text, |
|
|
bot_response=response_text, |
|
|
model_name="gemini-2.5-flash" |
|
|
) |
|
|
except Exception as e: |
|
|
logger.error(f"Text chatbot error: {str(e)}") |
|
|
raise HTTPException(status_code=500, detail=str(e)) |