malek-messaoudii
Refactor chatbot and STT services to improve model loading, response generation, and error handling; utilize Hugging Face API for STT functionality
e8aa76b
raw
history blame
2.23 kB
import requests
import logging
import tempfile
import os
logger = logging.getLogger(__name__)
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
"""
Convert audio bytes to text using Hugging Face Inference API.
No ffmpeg required!
"""
try:
logger.info(f"Converting audio to text using Hugging Face API")
# Use Hugging Face Inference API (free, no ffmpeg needed)
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
# For Hugging Face Spaces, you might not need an API key for public models
headers = {}
# Send audio bytes directly to Hugging Face API
response = requests.post(API_URL, headers=headers, data=audio_bytes)
if response.status_code == 200:
result = response.json()
transcribed_text = result.get("text", "").strip()
if not transcribed_text:
transcribed_text = "No speech detected in the audio."
logger.info(f"βœ“ STT successful: '{transcribed_text}'")
return transcribed_text
else:
# If API fails, use fallback
error_msg = f"Hugging Face API error: {response.status_code}"
logger.error(error_msg)
return await fallback_stt(audio_bytes, filename)
except Exception as e:
logger.error(f"βœ— STT failed: {str(e)}")
return await fallback_stt(audio_bytes, filename)
async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
"""Fallback STT using a simpler approach"""
try:
# Simple fallback that doesn't require ffmpeg
file_size = len(audio_bytes)
file_type = filename.split('.')[-1] if '.' in filename else 'unknown'
return f"Audio file '{filename}' ({file_type}, {file_size} bytes) received successfully. For full STT, please ensure ffmpeg is installed or use the Hugging Face API directly."
except Exception as e:
logger.error(f"Fallback STT also failed: {str(e)}")
return "Audio processing failed. Please try a different audio format or install ffmpeg."