Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / services /stt_service.py

malek-messaoudii

Refactor chatbot and STT services to improve model loading, response generation, and error handling; utilize Hugging Face API for STT functionality

e8aa76b about 1 month ago

raw

history blame

2.23 kB

	import requests
	import logging
	import tempfile
	import os

	logger = logging.getLogger(__name__)

	async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
	"""
	Convert audio bytes to text using Hugging Face Inference API.
	No ffmpeg required!
	"""
	try:
	logger.info(f"Converting audio to text using Hugging Face API")

	# Use Hugging Face Inference API (free, no ffmpeg needed)
	API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"

	# For Hugging Face Spaces, you might not need an API key for public models
	headers = {}

	# Send audio bytes directly to Hugging Face API
	response = requests.post(API_URL, headers=headers, data=audio_bytes)

	if response.status_code == 200:
	result = response.json()
	transcribed_text = result.get("text", "").strip()

	if not transcribed_text:
	transcribed_text = "No speech detected in the audio."

	logger.info(f"✓ STT successful: '{transcribed_text}'")
	return transcribed_text

	else:
	# If API fails, use fallback
	error_msg = f"Hugging Face API error: {response.status_code}"
	logger.error(error_msg)
	return await fallback_stt(audio_bytes, filename)

	except Exception as e:
	logger.error(f"✗ STT failed: {str(e)}")
	return await fallback_stt(audio_bytes, filename)


	async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
	"""Fallback STT using a simpler approach"""
	try:
	# Simple fallback that doesn't require ffmpeg
	file_size = len(audio_bytes)
	file_type = filename.split('.')[-1] if '.' in filename else 'unknown'

	return f"Audio file '{filename}' ({file_type}, {file_size} bytes) received successfully. For full STT, please ensure ffmpeg is installed or use the Hugging Face API directly."

	except Exception as e:
	logger.error(f"Fallback STT also failed: {str(e)}")
	return "Audio processing failed. Please try a different audio format or install ffmpeg."