Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / services /tts_service.py

malek-messaoudii

Enhance configuration validation and audio processing limits

918acab 29 days ago

2.21 kB

	from services.gemini_client import get_gemini_client
	from google.genai import types
	import base64
	import logging

	logger = logging.getLogger(__name__)


	async def generate_tts(text: str) -> bytes:
	"""
	Convert text to speech using Gemini API.

	Args:
	text: Text to convert to speech

	Returns:
	Audio bytes in WAV format

	Raises:
	Exception: If TTS generation fails
	"""
	try:
	client = get_gemini_client()

	logger.info(f"Generating speech for: '{text}'")

	# For TTS, we need to use the specific TTS endpoint
	# Note: This might require different API calls based on Gemini's actual TTS API

	# Temporary fallback: Use regular model with text-to-speech request
	response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=[f"Convert this to speech: {text}"],
	config=types.GenerateContentConfig(
	response_mime_type="audio/wav",
	),
	)

	# Extract audio data from response
	# This part depends on the actual Gemini TTS API response structure
	if (response.candidates and
	len(response.candidates) > 0 and
	response.candidates[0].content and
	response.candidates[0].content.parts and
	len(response.candidates[0].content.parts) > 0):

	part = response.candidates[0].content.parts[0]
	if hasattr(part, 'inline_data') and part.inline_data:
	audio_bytes = base64.b64decode(part.inline_data.data)
	else:
	# If no audio data, create a fallback audio or raise error
	raise Exception("No audio data in response")
	else:
	raise Exception("Invalid response format from TTS service")

	logger.info(f"✓ TTS successful: {len(audio_bytes)} bytes generated")

	return audio_bytes

	except Exception as e:
	logger.error(f"✗ TTS failed: {str(e)}")
	# Fallback: Return a simple error message as text
	raise Exception(f"Text-to-speech generation failed: {str(e)}")