File size: 5,055 Bytes
9aa985d
 
 
4a13628
918acab
544d113
4a13628
 
 
 
 
c7fc3b6
 
 
218c6a3
73d4f3c
4a13628
d4b6133
4a13628
218c6a3
4a13628
 
 
 
 
9aa985d
4a13628
 
 
9aa985d
4a13628
9aa985d
c7fc3b6
 
4a13628
73d4f3c
520a06a
4a13628
 
 
 
 
 
520a06a
4a13628
520a06a
 
 
4a13628
520a06a
4a13628
9aa985d
4a13628
d4b6133
918acab
 
 
 
 
 
 
 
218c6a3
4a13628
 
 
 
 
 
 
9aa985d
4a13628
9aa985d
544d113
 
 
4a13628
d4b6133
4a13628
 
 
 
 
 
 
 
 
 
 
d4b6133
4a13628
d4b6133
 
 
4a13628
d4b6133
4a13628
544d113
4a13628
 
 
544d113
918acab
 
 
 
 
 
 
 
d4b6133
4a13628
 
 
 
 
 
 
544d113
4a13628
 
 
 
544d113
4a13628
544d113
 
4a13628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse
import io
import logging
from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE
from services.stt_service import speech_to_text
from services.tts_service import generate_tts
from services.chatbot_service import get_chatbot_response
from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/audio", tags=["Audio"])


@router.post("/tts")
async def tts(request: TTSRequest):
    """
    Convert text to speech and return audio file.
    
    Example:
    - POST /audio/tts
    - Body: {"text": "Hello, welcome to our system"}
    - Returns: WAV audio file
    """
    try:
        logger.info(f"TTS request received for text: '{request.text}'")
        audio_bytes = await generate_tts(request.text)
        return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav")
    except Exception as e:
        logger.error(f"TTS error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/stt", response_model=STTResponse)
async def stt(file: UploadFile = File(...)):
    """
    Convert audio file to text.
    
    Example:
    - POST /audio/stt
    - File: audio.mp3 (or .wav, .m4a)
    - Returns: {"text": "transcribed text", "model_name": "gemini-2.5-flash", ...}
    """
    # Validate file type
    if file.content_type not in ALLOWED_AUDIO_TYPES:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
        )
    
    try:
        logger.info(f"STT request received for file: {file.filename}")
        audio_bytes = await file.read()
        
        # Check file size
        if len(audio_bytes) > MAX_AUDIO_SIZE:
            raise HTTPException(
                status_code=400,
                detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
            )
        
        text = await speech_to_text(audio_bytes, file.filename)
        
        return STTResponse(
            text=text,
            model_name="gemini-2.5-flash",
            language="en",
            duration_seconds=None
        )
    except Exception as e:
        logger.error(f"STT error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/chatbot")
async def chatbot_voice(file: UploadFile = File(...)):
    """
    Full voice chatbot flow (Audio → Text → Response → Audio).
    
    Example:
    - POST /audio/chatbot
    - File: user_voice.mp3
    - Returns: Response audio file (WAV)
    
    Process:
    1. Converts user's audio to text (STT)
    2. Generates chatbot response to user's text
    3. Converts response back to audio (TTS)
    """
    # Validate file type
    if file.content_type not in ALLOWED_AUDIO_TYPES:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
        )
    
    try:
        logger.info(f"Voice chatbot request received for file: {file.filename}")
        
        # Step 1: Convert audio to text
        audio_bytes = await file.read()
        
        # Check file size
        if len(audio_bytes) > MAX_AUDIO_SIZE:
            raise HTTPException(
                status_code=400,
                detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
            )
            
        user_text = await speech_to_text(audio_bytes, file.filename)
        logger.info(f"Step 1 - STT: {user_text}")
        
        # Step 2: Generate chatbot response
        response_text = await get_chatbot_response(user_text)
        logger.info(f"Step 2 - Response: {response_text}")
        
        # Step 3: Convert response to audio
        audio_response = await generate_tts(response_text)
        logger.info("Step 3 - TTS: Complete")
        
        return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav")
        
    except Exception as e:
        logger.error(f"Voice chatbot error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/chatbot-text", response_model=ChatbotResponse)
async def chatbot_text(request: ChatbotRequest):
    """
    Chatbot interaction with text input/output (no audio).
    
    Example:
    - POST /audio/chatbot-text
    - Body: {"text": "What is the capital of France?"}
    - Returns: {"user_input": "What is...", "bot_response": "The capital...", ...}
    """
    try:
        logger.info(f"Text chatbot request: {request.text}")
        response_text = await get_chatbot_response(request.text)
        
        return ChatbotResponse(
            user_input=request.text,
            bot_response=response_text,
            model_name="gemini-2.5-flash"
        )
    except Exception as e:
        logger.error(f"Text chatbot error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))