Spaces:
Sleeping
Sleeping
Michael Hu
commited on
Commit
Β·
8b93773
1
Parent(s):
94a90b6
try fixing gr.blocks issue
Browse files
app.py
CHANGED
|
@@ -200,7 +200,7 @@ def process_audio_pipeline(
|
|
| 200 |
return error_msg, "", "", None, f"System Error: {str(e)}"
|
| 201 |
|
| 202 |
def create_interface():
|
| 203 |
-
"""Create and configure the Gradio interface"""
|
| 204 |
|
| 205 |
# Initialize application
|
| 206 |
initialize_application()
|
|
@@ -208,9 +208,6 @@ def create_interface():
|
|
| 208 |
# Get supported configurations
|
| 209 |
config = get_supported_configurations()
|
| 210 |
|
| 211 |
-
# Voice options mapping
|
| 212 |
-
voice_options = ["kokoro", "dia", "cosyvoice2", "dummy"]
|
| 213 |
-
|
| 214 |
# Language options mapping
|
| 215 |
language_options = {
|
| 216 |
"Chinese (Mandarin)": "zh",
|
|
@@ -220,132 +217,62 @@ def create_interface():
|
|
| 220 |
"English": "en"
|
| 221 |
}
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
css="""
|
| 228 |
-
.gradio-container {
|
| 229 |
-
max-width: 1200px !important;
|
| 230 |
-
}
|
| 231 |
-
.audio-player {
|
| 232 |
-
width: 100%;
|
| 233 |
-
}
|
| 234 |
-
"""
|
| 235 |
-
) as interface:
|
| 236 |
-
|
| 237 |
-
gr.Markdown("# π§ High-Quality Audio Translation System")
|
| 238 |
-
gr.Markdown("Upload English Audio β Get Chinese Speech Output")
|
| 239 |
-
|
| 240 |
-
with gr.Row():
|
| 241 |
-
with gr.Column(scale=2):
|
| 242 |
-
# Audio input
|
| 243 |
-
audio_input = gr.Audio(
|
| 244 |
-
label=f"Upload Audio File ({', '.join(config['audio_formats']).upper()})",
|
| 245 |
-
type="filepath",
|
| 246 |
-
format="wav"
|
| 247 |
-
)
|
| 248 |
-
|
| 249 |
-
# Model selection
|
| 250 |
-
asr_model = gr.Dropdown(
|
| 251 |
-
choices=config['asr_models'],
|
| 252 |
-
value=config['asr_models'][0] if config['asr_models'] else "parakeet",
|
| 253 |
-
label="Speech Recognition Model",
|
| 254 |
-
info="Choose the ASR model for speech recognition"
|
| 255 |
-
)
|
| 256 |
-
|
| 257 |
-
# Language selection
|
| 258 |
-
target_language = gr.Dropdown(
|
| 259 |
-
choices=list(language_options.keys()),
|
| 260 |
-
value="Chinese (Mandarin)",
|
| 261 |
-
label="Target Language",
|
| 262 |
-
info="Select the target language for translation"
|
| 263 |
-
)
|
| 264 |
-
|
| 265 |
-
with gr.Column(scale=1):
|
| 266 |
-
# TTS Settings
|
| 267 |
-
gr.Markdown("### TTS Settings")
|
| 268 |
-
|
| 269 |
-
voice = gr.Dropdown(
|
| 270 |
-
choices=voice_options,
|
| 271 |
-
value="kokoro",
|
| 272 |
-
label="Voice"
|
| 273 |
-
)
|
| 274 |
-
|
| 275 |
-
speed = gr.Slider(
|
| 276 |
-
minimum=config['speed_range']['min'],
|
| 277 |
-
maximum=config['speed_range']['max'],
|
| 278 |
-
value=1.0,
|
| 279 |
-
step=0.1,
|
| 280 |
-
label="Speech Speed"
|
| 281 |
-
)
|
| 282 |
-
|
| 283 |
-
# Process button
|
| 284 |
-
process_btn = gr.Button("π Process Audio", variant="primary", size="lg")
|
| 285 |
-
|
| 286 |
-
# Status message
|
| 287 |
-
status_output = gr.Markdown(label="Status")
|
| 288 |
-
|
| 289 |
-
# Results section
|
| 290 |
-
with gr.Row():
|
| 291 |
-
with gr.Column(scale=2):
|
| 292 |
-
# Text outputs
|
| 293 |
-
original_text = gr.Textbox(
|
| 294 |
-
label="Recognition Results",
|
| 295 |
-
lines=4,
|
| 296 |
-
max_lines=8,
|
| 297 |
-
interactive=False
|
| 298 |
-
)
|
| 299 |
-
|
| 300 |
-
translated_text = gr.Textbox(
|
| 301 |
-
label="Translation Results",
|
| 302 |
-
lines=4,
|
| 303 |
-
max_lines=8,
|
| 304 |
-
interactive=False
|
| 305 |
-
)
|
| 306 |
-
|
| 307 |
-
# Processing details
|
| 308 |
-
with gr.Accordion("Processing Details", open=False):
|
| 309 |
-
processing_details = gr.Code(
|
| 310 |
-
label="Metadata",
|
| 311 |
-
language="json",
|
| 312 |
-
interactive=False
|
| 313 |
-
)
|
| 314 |
-
|
| 315 |
-
with gr.Column(scale=1):
|
| 316 |
-
# Audio output
|
| 317 |
-
audio_output = gr.Audio(
|
| 318 |
-
label="Audio Output",
|
| 319 |
-
interactive=False
|
| 320 |
-
)
|
| 321 |
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
target_lang_code
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
asr_model=asr_model_val,
|
| 330 |
-
target_language=target_lang_code,
|
| 331 |
-
voice=voice_val,
|
| 332 |
-
speed=speed_val,
|
| 333 |
-
source_language="en"
|
| 334 |
-
)
|
| 335 |
-
|
| 336 |
-
process_btn.click(
|
| 337 |
-
fn=process_wrapper,
|
| 338 |
-
inputs=[audio_input, asr_model, target_language, voice, speed],
|
| 339 |
-
outputs=[status_output, original_text, translated_text, audio_output, processing_details]
|
| 340 |
-
)
|
| 341 |
-
|
| 342 |
-
# Add examples if needed
|
| 343 |
-
gr.Examples(
|
| 344 |
-
examples=[],
|
| 345 |
-
inputs=[audio_input, asr_model, target_language, voice, speed],
|
| 346 |
-
label="Example Configurations"
|
| 347 |
)
|
| 348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
return interface
|
| 350 |
|
| 351 |
def main():
|
|
|
|
| 200 |
return error_msg, "", "", None, f"System Error: {str(e)}"
|
| 201 |
|
| 202 |
def create_interface():
|
| 203 |
+
"""Create and configure the Gradio interface using gr.Interface for better compatibility"""
|
| 204 |
|
| 205 |
# Initialize application
|
| 206 |
initialize_application()
|
|
|
|
| 208 |
# Get supported configurations
|
| 209 |
config = get_supported_configurations()
|
| 210 |
|
|
|
|
|
|
|
|
|
|
| 211 |
# Language options mapping
|
| 212 |
language_options = {
|
| 213 |
"Chinese (Mandarin)": "zh",
|
|
|
|
| 217 |
"English": "en"
|
| 218 |
}
|
| 219 |
|
| 220 |
+
def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val):
|
| 221 |
+
"""Wrapper function for processing"""
|
| 222 |
+
# Map display language to code
|
| 223 |
+
target_lang_code = language_options.get(target_lang_val, "zh")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
+
return process_audio_pipeline(
|
| 226 |
+
audio_file=audio_file,
|
| 227 |
+
asr_model=asr_model_val,
|
| 228 |
+
target_language=target_lang_code,
|
| 229 |
+
voice=voice_val,
|
| 230 |
+
speed=speed_val,
|
| 231 |
+
source_language="en"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
)
|
| 233 |
|
| 234 |
+
# Create the interface using gr.Interface for better compatibility
|
| 235 |
+
interface = gr.Interface(
|
| 236 |
+
fn=process_wrapper,
|
| 237 |
+
inputs=[
|
| 238 |
+
gr.Audio(label="Upload Audio File", type="filepath"),
|
| 239 |
+
gr.Dropdown(
|
| 240 |
+
choices=config['asr_models'],
|
| 241 |
+
value=config['asr_models'][0] if config['asr_models'] else "parakeet",
|
| 242 |
+
label="Speech Recognition Model"
|
| 243 |
+
),
|
| 244 |
+
gr.Dropdown(
|
| 245 |
+
choices=list(language_options.keys()),
|
| 246 |
+
value="Chinese (Mandarin)",
|
| 247 |
+
label="Target Language"
|
| 248 |
+
),
|
| 249 |
+
gr.Dropdown(
|
| 250 |
+
choices=config['voices'],
|
| 251 |
+
value="kokoro",
|
| 252 |
+
label="Voice"
|
| 253 |
+
),
|
| 254 |
+
gr.Slider(
|
| 255 |
+
minimum=config['speed_range']['min'],
|
| 256 |
+
maximum=config['speed_range']['max'],
|
| 257 |
+
value=1.0,
|
| 258 |
+
step=0.1,
|
| 259 |
+
label="Speech Speed"
|
| 260 |
+
)
|
| 261 |
+
],
|
| 262 |
+
outputs=[
|
| 263 |
+
gr.Textbox(label="Status"),
|
| 264 |
+
gr.Textbox(label="Recognition Results"),
|
| 265 |
+
gr.Textbox(label="Translation Results"),
|
| 266 |
+
gr.Audio(label="Audio Output"),
|
| 267 |
+
gr.Code(label="Processing Details", language="json")
|
| 268 |
+
],
|
| 269 |
+
title="π§ High-Quality Audio Translation System",
|
| 270 |
+
description="Upload English Audio β Get Chinese Speech Output",
|
| 271 |
+
examples=[
|
| 272 |
+
# Add example configurations if needed
|
| 273 |
+
]
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
return interface
|
| 277 |
|
| 278 |
def main():
|