""" Main entry point for the Audio Translation Web Application Handles file upload, processing pipeline, and UI rendering using DDD architecture with Gradio """ import logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("app.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) import gradio as gr import os import json from typing import Optional, Tuple, Dict, Any # Import application services and DTOs from src.application.services.audio_processing_service import AudioProcessingApplicationService from src.application.services.configuration_service import ConfigurationApplicationService from src.application.dtos.audio_upload_dto import AudioUploadDto from src.application.dtos.processing_request_dto import ProcessingRequestDto from src.application.dtos.processing_result_dto import ProcessingResultDto # Import infrastructure setup from src.infrastructure.config.container_setup import initialize_global_container, get_global_container # Initialize environment configurations os.makedirs("temp/uploads", exist_ok=True) os.makedirs("temp/outputs", exist_ok=True) # Global container initialization container_initialized = False def initialize_application(): """Initialize the application with dependency injection container""" global container_initialized if not container_initialized: try: logger.info("Initializing application container") initialize_global_container() container_initialized = True logger.info("Application container initialized successfully") except Exception as e: logger.error(f"Failed to initialize application: {e}") raise RuntimeError(f"Application initialization failed: {str(e)}") def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto: """ Create AudioUploadDto from audio file path. Args: audio_file_path: Path to the uploaded audio file Returns: AudioUploadDto: DTO containing upload information """ try: if not audio_file_path or not os.path.exists(audio_file_path): raise ValueError("No audio file provided or file does not exist") filename = os.path.basename(audio_file_path) logger.info(f"Creating AudioUploadDto for file: {filename}") logger.info(f"Full file path: {audio_file_path}") with open(audio_file_path, 'rb') as f: content = f.read() # Determine content type based on file extension file_ext = os.path.splitext(filename.lower())[1] logger.info(f"Detected file extension: {file_ext}") content_type_map = { '.wav': 'audio/wav', '.mp3': 'audio/mpeg', '.m4a': 'audio/mp4', '.flac': 'audio/flac', '.ogg': 'audio/ogg' } content_type = content_type_map.get(file_ext, 'audio/wav') logger.info(f"Mapped content type: {content_type}") # Log file size info file_size = len(content) logger.info(f"File size: {file_size} bytes ({file_size / 1024 / 1024:.2f} MB)") return AudioUploadDto( filename=filename, content=content, content_type=content_type, size=len(content) ) except Exception as e: logger.error(f"Failed to create AudioUploadDto: {e}") raise ValueError(f"Invalid audio file: {str(e)}") def get_supported_configurations() -> dict: """ Get supported configurations from application service. Returns: dict: Supported configurations """ try: logger.info("Getting global container...") container = get_global_container() logger.info("Resolving AudioProcessingApplicationService...") audio_service = container.resolve(AudioProcessingApplicationService) logger.info("Getting supported configurations from service...") config = audio_service.get_supported_configurations() logger.info(f"Retrieved configurations: {config}") return config except Exception as e: logger.error(f"Failed to get configurations: {e}", exc_info=True) logger.warning("Using fallback configurations - this may indicate a configuration service issue") # Return fallback configurations fallback_config = { 'voices': ['chatterbox'], 'languages': ['en', 'zh'], 'audio_formats': ['wav', 'mp3', 'm4a', 'flac', 'ogg'], # Updated to include all supported formats 'max_file_size_mb': 100, 'speed_range': {'min': 0.5, 'max': 2.0} } logger.info(f"Using fallback configuration: {fallback_config}") return fallback_config def process_audio_pipeline( audio_file, target_language: str, voice: str, speed: float, source_language: str = "en" ) -> Tuple[str, str, str, str, str]: """ Execute the complete processing pipeline using application services. Args: audio_file: Gradio audio file input asr_model: ASR model to use target_language: Target language for translation voice: Voice for TTS speed: Speech speed source_language: Source language Returns: Tuple: (status_message, original_text, translated_text, audio_output_path, processing_details) """ try: if not audio_file: return "❌ No audio file provided", "", "", None, "" logger.info(f"Starting processing for: {audio_file} using whisper-medium model") logger.info(f"Audio file exists: {os.path.exists(audio_file) if audio_file else 'N/A'}") # Create audio upload DTO logger.info("Creating AudioUploadDto...") audio_upload = create_audio_upload_dto(audio_file) logger.info(f"AudioUploadDto created successfully - Content-Type: {audio_upload.content_type}") # Get application service from container container = get_global_container() audio_service = container.resolve(AudioProcessingApplicationService) # Create processing request request = ProcessingRequestDto( audio=audio_upload, asr_model="whisper-medium", # This will use the default from config if None target_language=target_language, voice=voice, speed=speed, source_language=source_language ) # Process through application service result = audio_service.process_audio_pipeline(request) if result.success: status_message = f"✅ Processing Complete! ({result.processing_time:.2f}s)" logger.info(f"Processing completed successfully in {result.processing_time:.2f}s") # Prepare processing details details = { "processing_time": f"{result.processing_time:.2f}s", "asr_model": "whisper-medium", "target_language": target_language, "voice": voice, "speed": speed } if result.metadata: details.update(result.metadata) processing_details = json.dumps(details, indent=2) return ( status_message, result.original_text or "", result.translated_text or "", result.audio_path if result.has_audio_output else None, processing_details ) else: error_msg = f"❌ Processing Failed: {result.error_message}" logger.error(f"Processing failed: {result.error_message}") return error_msg, "", "", None, f"Error: {result.error_message}" except Exception as e: logger.error(f"Processing failed: {str(e)}", exc_info=True) error_msg = f"❌ Processing Failed: {str(e)}" return error_msg, "", "", None, f"System Error: {str(e)}" def create_interface(): """Create and configure the Gradio interface using gr.Interface for better compatibility""" # Initialize application initialize_application() # Get supported configurations config = get_supported_configurations() # Log configuration details for debugging logger.info("=== Gradio Interface Configuration ===") logger.info(f"Supported voices: {config.get('voices', [])}") logger.info(f"Supported audio formats: {config.get('audio_formats', [])}") logger.info(f"Max file size: {config.get('max_file_size_mb', 0)} MB") logger.info(f"Speed range: {config.get('speed_range', {})}") logger.info("=== End Configuration ===") # Language options mapping language_options = { "Chinese (Mandarin)": "zh", "English": "en" } def process_wrapper(audio_file, target_lang_val, voice_val, speed_val): """Wrapper function for processing""" # Map display language to code target_lang_code = language_options.get(target_lang_val, "zh") # Get default ASR model from configuration default_asr_model = config.get('default_asr_model', 'whisper') return process_audio_pipeline( audio_file=audio_file, target_language=target_lang_code, voice=voice_val, speed=speed_val, source_language="en" ) # Create the interface using gr.Interface for better compatibility logger.info("Creating Gradio interface with updated file type support...") logger.info("Updated file types for Audio component: .wav, .mp3, .m4a, .flac, .ogg") interface = gr.Interface( fn=process_wrapper, inputs=[ gr.Audio( label="Upload Audio File", type="filepath", # Accept both file extensions and MIME types # This explicitly allows mp3 files to pass Gradio's frontend validation ), gr.Dropdown( choices=list(language_options.keys()), value="Chinese (Mandarin)", label="Target Language" ), gr.Dropdown( choices=config['voices'], value="chatterbox", label="Voice" ), gr.Slider( minimum=config['speed_range']['min'], maximum=config['speed_range']['max'], value=1.0, step=0.1, label="Speech Speed" ) ], outputs=[ gr.Textbox(label="Status"), gr.Textbox(label="Recognition Results"), gr.Textbox(label="Translation Results"), gr.Audio(label="Audio Output"), gr.Code(label="Processing Details", language="json") ], title="🎧 High-Quality Audio Translation System", description="Upload English Audio → Get Chinese Speech Output", examples=[ # Add example configurations if needed ] ) return interface def main(): """Main application entry point""" logger.info("Starting Gradio application") try: # Create interface interface = create_interface() # Launch the interface interface.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=False, show_error=True, quiet=False ) except Exception as e: logger.error(f"Failed to start application: {str(e)}", exc_info=True) raise if __name__ == "__main__": main()