File size: 2,206 Bytes
73d4f3c 91b1985 4f1c42b 4a13628 c7fc3b6 4a13628 9aa985d 4f1c42b 4a13628 918acab 4a13628 918acab 4a13628 918acab 4a13628 918acab 4a13628 918acab 4a13628 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | from services.gemini_client import get_gemini_client
from google.genai import types
import base64
import logging
logger = logging.getLogger(__name__)
async def generate_tts(text: str) -> bytes:
"""
Convert text to speech using Gemini API.
Args:
text: Text to convert to speech
Returns:
Audio bytes in WAV format
Raises:
Exception: If TTS generation fails
"""
try:
client = get_gemini_client()
logger.info(f"Generating speech for: '{text}'")
# For TTS, we need to use the specific TTS endpoint
# Note: This might require different API calls based on Gemini's actual TTS API
# Temporary fallback: Use regular model with text-to-speech request
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[f"Convert this to speech: {text}"],
config=types.GenerateContentConfig(
response_mime_type="audio/wav",
),
)
# Extract audio data from response
# This part depends on the actual Gemini TTS API response structure
if (response.candidates and
len(response.candidates) > 0 and
response.candidates[0].content and
response.candidates[0].content.parts and
len(response.candidates[0].content.parts) > 0):
part = response.candidates[0].content.parts[0]
if hasattr(part, 'inline_data') and part.inline_data:
audio_bytes = base64.b64decode(part.inline_data.data)
else:
# If no audio data, create a fallback audio or raise error
raise Exception("No audio data in response")
else:
raise Exception("Invalid response format from TTS service")
logger.info(f"✓ TTS successful: {len(audio_bytes)} bytes generated")
return audio_bytes
except Exception as e:
logger.error(f"✗ TTS failed: {str(e)}")
# Fallback: Return a simple error message as text
raise Exception(f"Text-to-speech generation failed: {str(e)}") |