import requests import logging import tempfile import os logger = logging.getLogger(__name__) async def speech_to_text(audio_bytes: bytes, filename: str) -> str: """ Convert audio bytes to text using Hugging Face Inference API. No ffmpeg required! """ try: logger.info(f"Converting audio to text using Hugging Face API") # Use Hugging Face Inference API (free, no ffmpeg needed) API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium" # For Hugging Face Spaces, you might not need an API key for public models headers = {} # Send audio bytes directly to Hugging Face API response = requests.post(API_URL, headers=headers, data=audio_bytes) if response.status_code == 200: result = response.json() transcribed_text = result.get("text", "").strip() if not transcribed_text: transcribed_text = "No speech detected in the audio." logger.info(f"✓ STT successful: '{transcribed_text}'") return transcribed_text else: # If API fails, use fallback error_msg = f"Hugging Face API error: {response.status_code}" logger.error(error_msg) return await fallback_stt(audio_bytes, filename) except Exception as e: logger.error(f"✗ STT failed: {str(e)}") return await fallback_stt(audio_bytes, filename) async def fallback_stt(audio_bytes: bytes, filename: str) -> str: """Fallback STT using a simpler approach""" try: # Simple fallback that doesn't require ffmpeg file_size = len(audio_bytes) file_type = filename.split('.')[-1] if '.' in filename else 'unknown' return f"Audio file '{filename}' ({file_type}, {file_size} bytes) received successfully. For full STT, please ensure ffmpeg is installed or use the Hugging Face API directly." except Exception as e: logger.error(f"Fallback STT also failed: {str(e)}") return "Audio processing failed. Please try a different audio format or install ffmpeg."