Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

App Files Files Community

Michael Hu commited on Jul 29

Commit

8b93773

1 Parent(s): 94a90b6

try fixing gr.blocks issue

Browse files

Files changed (1) hide show

app.py +54 -127

app.py CHANGED Viewed

@@ -200,7 +200,7 @@ def process_audio_pipeline(
         return error_msg, "", "", None, f"System Error: {str(e)}"
 def create_interface():
-    """Create and configure the Gradio interface"""
     # Initialize application
     initialize_application()
@@ -208,9 +208,6 @@ def create_interface():
     # Get supported configurations
     config = get_supported_configurations()
-    # Voice options mapping
-    voice_options = ["kokoro", "dia", "cosyvoice2", "dummy"]
     # Language options mapping
     language_options = {
         "Chinese (Mandarin)": "zh",
@@ -220,132 +217,62 @@ def create_interface():
         "English": "en"
     }
-    # Create the interface
-    with gr.Blocks(
-        title="🎧 High-Quality Audio Translation System",
-        theme=gr.themes.Soft(),
-        css="""
-        .gradio-container {
-            max-width: 1200px !important;
-        }
-        .audio-player {
-            width: 100%;
-        }
-        """
-    ) as interface:
-        gr.Markdown("# 🎧 High-Quality Audio Translation System")
-        gr.Markdown("Upload English Audio → Get Chinese Speech Output")
-        with gr.Row():
-            with gr.Column(scale=2):
-                # Audio input
-                audio_input = gr.Audio(
-                    label=f"Upload Audio File ({', '.join(config['audio_formats']).upper()})",
-                    type="filepath",
-                    format="wav"
-                )
-                # Model selection
-                asr_model = gr.Dropdown(
-                    choices=config['asr_models'],
-                    value=config['asr_models'][0] if config['asr_models'] else "parakeet",
-                    label="Speech Recognition Model",
-                    info="Choose the ASR model for speech recognition"
-                )
-                # Language selection
-                target_language = gr.Dropdown(
-                    choices=list(language_options.keys()),
-                    value="Chinese (Mandarin)",
-                    label="Target Language",
-                    info="Select the target language for translation"
-                )
-            with gr.Column(scale=1):
-                # TTS Settings
-                gr.Markdown("### TTS Settings")
-                voice = gr.Dropdown(
-                    choices=voice_options,
-                    value="kokoro",
-                    label="Voice"
-                )
-                speed = gr.Slider(
-                    minimum=config['speed_range']['min'],
-                    maximum=config['speed_range']['max'],
-                    value=1.0,
-                    step=0.1,
-                    label="Speech Speed"
-                )
-        # Process button
-        process_btn = gr.Button("🚀 Process Audio", variant="primary", size="lg")
-        # Status message
-        status_output = gr.Markdown(label="Status")
-        # Results section
-        with gr.Row():
-            with gr.Column(scale=2):
-                # Text outputs
-                original_text = gr.Textbox(
-                    label="Recognition Results",
-                    lines=4,
-                    max_lines=8,
-                    interactive=False
-                )
-                translated_text = gr.Textbox(
-                    label="Translation Results",
-                    lines=4,
-                    max_lines=8,
-                    interactive=False
-                )
-                # Processing details
-                with gr.Accordion("Processing Details", open=False):
-                    processing_details = gr.Code(
-                        label="Metadata",
-                        language="json",
-                        interactive=False
-                    )
-            with gr.Column(scale=1):
-                # Audio output
-                audio_output = gr.Audio(
-                    label="Audio Output",
-                    interactive=False
-                )
-        # Wire up the processing function
-        def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val):
-            # Map display language to code
-            target_lang_code = language_options.get(target_lang_val, "zh")
-            return process_audio_pipeline(
-                audio_file=audio_file,
-                asr_model=asr_model_val,
-                target_language=target_lang_code,
-                voice=voice_val,
-                speed=speed_val,
-                source_language="en"
-            )
-        process_btn.click(
-            fn=process_wrapper,
-            inputs=[audio_input, asr_model, target_language, voice, speed],
-            outputs=[status_output, original_text, translated_text, audio_output, processing_details]
-        )
-        # Add examples if needed
-        gr.Examples(
-            examples=[],
-            inputs=[audio_input, asr_model, target_language, voice, speed],
-            label="Example Configurations"
         )
     return interface
 def main():

         return error_msg, "", "", None, f"System Error: {str(e)}"
 def create_interface():
+    """Create and configure the Gradio interface using gr.Interface for better compatibility"""
     # Initialize application
     initialize_application()
     # Get supported configurations
     config = get_supported_configurations()
     # Language options mapping
     language_options = {
         "Chinese (Mandarin)": "zh",
         "English": "en"
     }
+    def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val):
+        """Wrapper function for processing"""
+        # Map display language to code
+        target_lang_code = language_options.get(target_lang_val, "zh")
+        return process_audio_pipeline(
+            audio_file=audio_file,
+            asr_model=asr_model_val,
+            target_language=target_lang_code,
+            voice=voice_val,
+            speed=speed_val,
+            source_language="en"
         )
+    # Create the interface using gr.Interface for better compatibility
+    interface = gr.Interface(
+        fn=process_wrapper,
+        inputs=[
+            gr.Audio(label="Upload Audio File", type="filepath"),
+            gr.Dropdown(
+                choices=config['asr_models'],
+                value=config['asr_models'][0] if config['asr_models'] else "parakeet",
+                label="Speech Recognition Model"
+            ),
+            gr.Dropdown(
+                choices=list(language_options.keys()),
+                value="Chinese (Mandarin)",
+                label="Target Language"
+            ),
+            gr.Dropdown(
+                choices=config['voices'],
+                value="kokoro",
+                label="Voice"
+            ),
+            gr.Slider(
+                minimum=config['speed_range']['min'],
+                maximum=config['speed_range']['max'],
+                value=1.0,
+                step=0.1,
+                label="Speech Speed"
+            )
+        ],
+        outputs=[
+            gr.Textbox(label="Status"),
+            gr.Textbox(label="Recognition Results"),
+            gr.Textbox(label="Translation Results"),
+            gr.Audio(label="Audio Output"),
+            gr.Code(label="Processing Details", language="json")
+        ],
+        title="🎧 High-Quality Audio Translation System",
+        description="Upload English Audio → Get Chinese Speech Output",
+        examples=[
+            # Add example configurations if needed
+        ]
+    )
     return interface
 def main():