Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / routes /audio.py

malek-messaoudii

Enhance configuration validation and audio processing limits

918acab 27 days ago

5.06 kB

	from fastapi import APIRouter, UploadFile, File, HTTPException
	from fastapi.responses import StreamingResponse
	import io
	import logging
	from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE
	from services.stt_service import speech_to_text
	from services.tts_service import generate_tts
	from services.chatbot_service import get_chatbot_response
	from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse

	logger = logging.getLogger(__name__)

	router = APIRouter(prefix="/audio", tags=["Audio"])


	@router.post("/tts")
	async def tts(request: TTSRequest):
	"""
	Convert text to speech and return audio file.

	Example:
	- POST /audio/tts
	- Body: {"text": "Hello, welcome to our system"}
	- Returns: WAV audio file
	"""
	try:
	logger.info(f"TTS request received for text: '{request.text}'")
	audio_bytes = await generate_tts(request.text)
	return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav")
	except Exception as e:
	logger.error(f"TTS error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.post("/stt", response_model=STTResponse)
	async def stt(file: UploadFile = File(...)):
	"""
	Convert audio file to text.

	Example:
	- POST /audio/stt
	- File: audio.mp3 (or .wav, .m4a)
	- Returns: {"text": "transcribed text", "model_name": "gemini-2.5-flash", ...}
	"""
	# Validate file type
	if file.content_type not in ALLOWED_AUDIO_TYPES:
	raise HTTPException(
	status_code=400,
	detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
	)

	try:
	logger.info(f"STT request received for file: {file.filename}")
	audio_bytes = await file.read()

	# Check file size
	if len(audio_bytes) > MAX_AUDIO_SIZE:
	raise HTTPException(
	status_code=400,
	detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
	)

	text = await speech_to_text(audio_bytes, file.filename)

	return STTResponse(
	text=text,
	model_name="gemini-2.5-flash",
	language="en",
	duration_seconds=None
	)
	except Exception as e:
	logger.error(f"STT error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.post("/chatbot")
	async def chatbot_voice(file: UploadFile = File(...)):
	"""
	Full voice chatbot flow (Audio → Text → Response → Audio).

	Example:
	- POST /audio/chatbot
	- File: user_voice.mp3
	- Returns: Response audio file (WAV)

	Process:
	1. Converts user's audio to text (STT)
	2. Generates chatbot response to user's text
	3. Converts response back to audio (TTS)
	"""
	# Validate file type
	if file.content_type not in ALLOWED_AUDIO_TYPES:
	raise HTTPException(
	status_code=400,
	detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
	)

	try:
	logger.info(f"Voice chatbot request received for file: {file.filename}")

	# Step 1: Convert audio to text
	audio_bytes = await file.read()

	# Check file size
	if len(audio_bytes) > MAX_AUDIO_SIZE:
	raise HTTPException(
	status_code=400,
	detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
	)

	user_text = await speech_to_text(audio_bytes, file.filename)
	logger.info(f"Step 1 - STT: {user_text}")

	# Step 2: Generate chatbot response
	response_text = await get_chatbot_response(user_text)
	logger.info(f"Step 2 - Response: {response_text}")

	# Step 3: Convert response to audio
	audio_response = await generate_tts(response_text)
	logger.info("Step 3 - TTS: Complete")

	return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav")

	except Exception as e:
	logger.error(f"Voice chatbot error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.post("/chatbot-text", response_model=ChatbotResponse)
	async def chatbot_text(request: ChatbotRequest):
	"""
	Chatbot interaction with text input/output (no audio).

	Example:
	- POST /audio/chatbot-text
	- Body: {"text": "What is the capital of France?"}
	- Returns: {"user_input": "What is...", "bot_response": "The capital...", ...}
	"""
	try:
	logger.info(f"Text chatbot request: {request.text}")
	response_text = await get_chatbot_response(request.text)

	return ChatbotResponse(
	user_input=request.text,
	bot_response=response_text,
	model_name="gemini-2.5-flash"
	)
	except Exception as e:
	logger.error(f"Text chatbot error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))