Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

File size: 5,055 Bytes

from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse
import io
import logging
from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE
from services.stt_service import speech_to_text
from services.tts_service import generate_tts
from services.chatbot_service import get_chatbot_response
from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/audio", tags=["Audio"])


@router.post("/tts")
async def tts(request: TTSRequest):
    """
    Convert text to speech and return audio file.
    
    Example:
    - POST /audio/tts
    - Body: {"text": "Hello, welcome to our system"}
    - Returns: WAV audio file
    """
    try:
        logger.info(f"TTS request received for text: '{request.text}'")
        audio_bytes = await generate_tts(request.text)
        return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav")
    except Exception as e:
        logger.error(f"TTS error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/stt", response_model=STTResponse)
async def stt(file: UploadFile = File(...)):
    """
    Convert audio file to text.
    
    Example:
    - POST /audio/stt
    - File: audio.mp3 (or .wav, .m4a)
    - Returns: {"text": "transcribed text", "model_name": "gemini-2.5-flash", ...}
    """
    # Validate file type
    if file.content_type not in ALLOWED_AUDIO_TYPES:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
        )
    
    try:
        logger.info(f"STT request received for file: {file.filename}")
        audio_bytes = await file.read()
        
        # Check file size
        if len(audio_bytes) > MAX_AUDIO_SIZE:
            raise HTTPException(
                status_code=400,
                detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
            )
        
        text = await speech_to_text(audio_bytes, file.filename)
        
        return STTResponse(
            text=text,
            model_name="gemini-2.5-flash",
            language="en",
            duration_seconds=None
        )
    except Exception as e:
        logger.error(f"STT error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/chatbot")
async def chatbot_voice(file: UploadFile = File(...)):
    """
    Full voice chatbot flow (Audio → Text → Response → Audio).
    
    Example:
    - POST /audio/chatbot
    - File: user_voice.mp3
    - Returns: Response audio file (WAV)
    
    Process:
    1. Converts user's audio to text (STT)
    2. Generates chatbot response to user's text
    3. Converts response back to audio (TTS)
    """
    # Validate file type
    if file.content_type not in ALLOWED_AUDIO_TYPES:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
        )
    
    try:
        logger.info(f"Voice chatbot request received for file: {file.filename}")
        
        # Step 1: Convert audio to text
        audio_bytes = await file.read()
        
        # Check file size
        if len(audio_bytes) > MAX_AUDIO_SIZE:
            raise HTTPException(
                status_code=400,
                detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
            )
            
        user_text = await speech_to_text(audio_bytes, file.filename)
        logger.info(f"Step 1 - STT: {user_text}")
        
        # Step 2: Generate chatbot response
        response_text = await get_chatbot_response(user_text)
        logger.info(f"Step 2 - Response: {response_text}")
        
        # Step 3: Convert response to audio
        audio_response = await generate_tts(response_text)
        logger.info("Step 3 - TTS: Complete")
        
        return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav")
        
    except Exception as e:
        logger.error(f"Voice chatbot error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/chatbot-text", response_model=ChatbotResponse)
async def chatbot_text(request: ChatbotRequest):
    """
    Chatbot interaction with text input/output (no audio).
    
    Example:
    - POST /audio/chatbot-text
    - Body: {"text": "What is the capital of France?"}
    - Returns: {"user_input": "What is...", "bot_response": "The capital...", ...}
    """
    try:
        logger.info(f"Text chatbot request: {request.text}")
        response_text = await get_chatbot_response(request.text)
        
        return ChatbotResponse(
            user_input=request.text,
            bot_response=response_text,
            model_name="gemini-2.5-flash"
        )
    except Exception as e:
        logger.error(f"Text chatbot error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))