malek-messaoudii
Enhance configuration validation and audio processing limits
918acab
raw
history blame
2.21 kB
from services.gemini_client import get_gemini_client
from google.genai import types
import base64
import logging
logger = logging.getLogger(__name__)
async def generate_tts(text: str) -> bytes:
"""
Convert text to speech using Gemini API.
Args:
text: Text to convert to speech
Returns:
Audio bytes in WAV format
Raises:
Exception: If TTS generation fails
"""
try:
client = get_gemini_client()
logger.info(f"Generating speech for: '{text}'")
# For TTS, we need to use the specific TTS endpoint
# Note: This might require different API calls based on Gemini's actual TTS API
# Temporary fallback: Use regular model with text-to-speech request
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[f"Convert this to speech: {text}"],
config=types.GenerateContentConfig(
response_mime_type="audio/wav",
),
)
# Extract audio data from response
# This part depends on the actual Gemini TTS API response structure
if (response.candidates and
len(response.candidates) > 0 and
response.candidates[0].content and
response.candidates[0].content.parts and
len(response.candidates[0].content.parts) > 0):
part = response.candidates[0].content.parts[0]
if hasattr(part, 'inline_data') and part.inline_data:
audio_bytes = base64.b64decode(part.inline_data.data)
else:
# If no audio data, create a fallback audio or raise error
raise Exception("No audio data in response")
else:
raise Exception("Invalid response format from TTS service")
logger.info(f"βœ“ TTS successful: {len(audio_bytes)} bytes generated")
return audio_bytes
except Exception as e:
logger.error(f"βœ— TTS failed: {str(e)}")
# Fallback: Return a simple error message as text
raise Exception(f"Text-to-speech generation failed: {str(e)}")