Spaces:
Sleeping
Sleeping
| """ | |
| Main entry point for the Audio Translation Web Application | |
| Handles file upload, processing pipeline, and UI rendering using DDD architecture with Gradio | |
| """ | |
| import logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler("app.log"), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| import gradio as gr | |
| import os | |
| import json | |
| from typing import Optional, Tuple, Dict, Any | |
| # Import application services and DTOs | |
| from src.application.services.audio_processing_service import AudioProcessingApplicationService | |
| from src.application.services.configuration_service import ConfigurationApplicationService | |
| from src.application.dtos.audio_upload_dto import AudioUploadDto | |
| from src.application.dtos.processing_request_dto import ProcessingRequestDto | |
| from src.application.dtos.processing_result_dto import ProcessingResultDto | |
| # Import infrastructure setup | |
| from src.infrastructure.config.container_setup import initialize_global_container, get_global_container | |
| # Initialize environment configurations | |
| os.makedirs("temp/uploads", exist_ok=True) | |
| os.makedirs("temp/outputs", exist_ok=True) | |
| # Global container initialization | |
| container_initialized = False | |
| def initialize_application(): | |
| """Initialize the application with dependency injection container""" | |
| global container_initialized | |
| if not container_initialized: | |
| try: | |
| logger.info("Initializing application container") | |
| initialize_global_container() | |
| container_initialized = True | |
| logger.info("Application container initialized successfully") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize application: {e}") | |
| raise RuntimeError(f"Application initialization failed: {str(e)}") | |
| def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto: | |
| """ | |
| Create AudioUploadDto from audio file path. | |
| Args: | |
| audio_file_path: Path to the uploaded audio file | |
| Returns: | |
| AudioUploadDto: DTO containing upload information | |
| """ | |
| try: | |
| if not audio_file_path or not os.path.exists(audio_file_path): | |
| raise ValueError("No audio file provided or file does not exist") | |
| filename = os.path.basename(audio_file_path) | |
| logger.info(f"Creating AudioUploadDto for file: {filename}") | |
| logger.info(f"Full file path: {audio_file_path}") | |
| with open(audio_file_path, 'rb') as f: | |
| content = f.read() | |
| # Determine content type based on file extension | |
| file_ext = os.path.splitext(filename.lower())[1] | |
| logger.info(f"Detected file extension: {file_ext}") | |
| content_type_map = { | |
| '.wav': 'audio/wav', | |
| '.mp3': 'audio/mpeg', | |
| '.m4a': 'audio/mp4', | |
| '.flac': 'audio/flac', | |
| '.ogg': 'audio/ogg' | |
| } | |
| content_type = content_type_map.get(file_ext, 'audio/wav') | |
| logger.info(f"Mapped content type: {content_type}") | |
| # Log file size info | |
| file_size = len(content) | |
| logger.info(f"File size: {file_size} bytes ({file_size / 1024 / 1024:.2f} MB)") | |
| return AudioUploadDto( | |
| filename=filename, | |
| content=content, | |
| content_type=content_type, | |
| size=len(content) | |
| ) | |
| except Exception as e: | |
| logger.error(f"Failed to create AudioUploadDto: {e}") | |
| raise ValueError(f"Invalid audio file: {str(e)}") | |
| def get_supported_configurations() -> dict: | |
| """ | |
| Get supported configurations from application service. | |
| Returns: | |
| dict: Supported configurations | |
| """ | |
| try: | |
| logger.info("Getting global container...") | |
| container = get_global_container() | |
| logger.info("Resolving AudioProcessingApplicationService...") | |
| audio_service = container.resolve(AudioProcessingApplicationService) | |
| logger.info("Getting supported configurations from service...") | |
| config = audio_service.get_supported_configurations() | |
| logger.info(f"Retrieved configurations: {config}") | |
| return config | |
| except Exception as e: | |
| logger.error(f"Failed to get configurations: {e}", exc_info=True) | |
| logger.warning("Using fallback configurations - this may indicate a configuration service issue") | |
| # Return fallback configurations | |
| fallback_config = { | |
| 'voices': ['chatterbox'], | |
| 'languages': ['en', 'zh'], | |
| 'audio_formats': ['wav', 'mp3', 'm4a', 'flac', 'ogg'], # Updated to include all supported formats | |
| 'max_file_size_mb': 100, | |
| 'speed_range': {'min': 0.5, 'max': 2.0} | |
| } | |
| logger.info(f"Using fallback configuration: {fallback_config}") | |
| return fallback_config | |
| def process_audio_pipeline( | |
| audio_file, | |
| target_language: str, | |
| voice: str, | |
| speed: float, | |
| source_language: str = "en" | |
| ) -> Tuple[str, str, str, str, str]: | |
| """ | |
| Execute the complete processing pipeline using application services. | |
| Args: | |
| audio_file: Gradio audio file input | |
| asr_model: ASR model to use | |
| target_language: Target language for translation | |
| voice: Voice for TTS | |
| speed: Speech speed | |
| source_language: Source language | |
| Returns: | |
| Tuple: (status_message, original_text, translated_text, audio_output_path, processing_details) | |
| """ | |
| try: | |
| if not audio_file: | |
| return "β No audio file provided", "", "", None, "" | |
| logger.info(f"Starting processing for: {audio_file} using {asr_model} model") | |
| logger.info(f"Audio file exists: {os.path.exists(audio_file) if audio_file else 'N/A'}") | |
| # Create audio upload DTO | |
| logger.info("Creating AudioUploadDto...") | |
| audio_upload = create_audio_upload_dto(audio_file) | |
| logger.info(f"AudioUploadDto created successfully - Content-Type: {audio_upload.content_type}") | |
| # Get application service from container | |
| container = get_global_container() | |
| audio_service = container.resolve(AudioProcessingApplicationService) | |
| # Create processing request | |
| request = ProcessingRequestDto( | |
| audio=audio_upload, | |
| asr_model=asr_model, # This will use the default from config if None | |
| target_language=target_language, | |
| voice=voice, | |
| speed=speed, | |
| source_language=source_language | |
| ) | |
| # Process through application service | |
| result = audio_service.process_audio_pipeline(request) | |
| if result.success: | |
| status_message = f"β Processing Complete! ({result.processing_time:.2f}s)" | |
| logger.info(f"Processing completed successfully in {result.processing_time:.2f}s") | |
| # Prepare processing details | |
| details = { | |
| "processing_time": f"{result.processing_time:.2f}s", | |
| "asr_model": asr_model, | |
| "target_language": target_language, | |
| "voice": voice, | |
| "speed": speed | |
| } | |
| if result.metadata: | |
| details.update(result.metadata) | |
| processing_details = json.dumps(details, indent=2) | |
| return ( | |
| status_message, | |
| result.original_text or "", | |
| result.translated_text or "", | |
| result.audio_path if result.has_audio_output else None, | |
| processing_details | |
| ) | |
| else: | |
| error_msg = f"β Processing Failed: {result.error_message}" | |
| logger.error(f"Processing failed: {result.error_message}") | |
| return error_msg, "", "", None, f"Error: {result.error_message}" | |
| except Exception as e: | |
| logger.error(f"Processing failed: {str(e)}", exc_info=True) | |
| error_msg = f"β Processing Failed: {str(e)}" | |
| return error_msg, "", "", None, f"System Error: {str(e)}" | |
| def create_interface(): | |
| """Create and configure the Gradio interface using gr.Interface for better compatibility""" | |
| # Initialize application | |
| initialize_application() | |
| # Get supported configurations | |
| config = get_supported_configurations() | |
| # Log configuration details for debugging | |
| logger.info("=== Gradio Interface Configuration ===") | |
| logger.info(f"Supported voices: {config.get('voices', [])}") | |
| logger.info(f"Supported audio formats: {config.get('audio_formats', [])}") | |
| logger.info(f"Max file size: {config.get('max_file_size_mb', 0)} MB") | |
| logger.info(f"Speed range: {config.get('speed_range', {})}") | |
| logger.info("=== End Configuration ===") | |
| # Language options mapping | |
| language_options = { | |
| "Chinese (Mandarin)": "zh", | |
| "English": "en" | |
| } | |
| def process_wrapper(audio_file, target_lang_val, voice_val, speed_val): | |
| """Wrapper function for processing""" | |
| # Map display language to code | |
| target_lang_code = language_options.get(target_lang_val, "zh") | |
| # Get default ASR model from configuration | |
| default_asr_model = config.get('default_asr_model', 'whisper') | |
| return process_audio_pipeline( | |
| audio_file=audio_file, | |
| target_language=target_lang_code, | |
| voice=voice_val, | |
| speed=speed_val, | |
| source_language="en" | |
| ) | |
| # Create the interface using gr.Interface for better compatibility | |
| logger.info("Creating Gradio interface with updated file type support...") | |
| logger.info("Updated file types for Audio component: .wav, .mp3, .m4a, .flac, .ogg") | |
| interface = gr.Interface( | |
| fn=process_wrapper, | |
| inputs=[ | |
| gr.Audio( | |
| label="Upload Audio File", | |
| type="filepath", | |
| # Accept both file extensions and MIME types | |
| # This explicitly allows mp3 files to pass Gradio's frontend validation | |
| ), | |
| gr.Dropdown( | |
| choices=list(language_options.keys()), | |
| value="Chinese (Mandarin)", | |
| label="Target Language" | |
| ), | |
| gr.Dropdown( | |
| choices=config['voices'], | |
| value="chatterbox", | |
| label="Voice" | |
| ), | |
| gr.Slider( | |
| minimum=config['speed_range']['min'], | |
| maximum=config['speed_range']['max'], | |
| value=1.0, | |
| step=0.1, | |
| label="Speech Speed" | |
| ) | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Status"), | |
| gr.Textbox(label="Recognition Results"), | |
| gr.Textbox(label="Translation Results"), | |
| gr.Audio(label="Audio Output"), | |
| gr.Code(label="Processing Details", language="json") | |
| ], | |
| title="π§ High-Quality Audio Translation System", | |
| description="Upload English Audio β Get Chinese Speech Output", | |
| examples=[ | |
| # Add example configurations if needed | |
| ] | |
| ) | |
| return interface | |
| def main(): | |
| """Main application entry point""" | |
| logger.info("Starting Gradio application") | |
| try: | |
| # Create interface | |
| interface = create_interface() | |
| # Launch the interface | |
| interface.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| debug=False, | |
| show_error=True, | |
| quiet=False | |
| ) | |
| except Exception as e: | |
| logger.error(f"Failed to start application: {str(e)}", exc_info=True) | |
| raise | |
| if __name__ == "__main__": | |
| main() |