Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
0207836
1
Parent(s):
0a19e95
Fix Missing UI comps
Browse files- app.py +217 -1
- backend_modal/modal_runner.py +54 -1
app.py
CHANGED
|
@@ -19,6 +19,75 @@ AVAILABLE_VOICES = [
|
|
| 19 |
]
|
| 20 |
DEFAULT_SPEAKERS = ['en-Alice_woman', 'en-Carter_man', 'en-Frank_man', 'en-Maya_woman']
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# --- Modal Connection ---
|
| 23 |
try:
|
| 24 |
# Look up the remote class
|
|
@@ -54,7 +123,7 @@ def create_demo_interface():
|
|
| 54 |
alt="VibeVoice Banner">
|
| 55 |
</div>
|
| 56 |
""")
|
| 57 |
-
gr.Markdown("##
|
| 58 |
|
| 59 |
with gr.Tabs():
|
| 60 |
with gr.Tab("Generate"):
|
|
@@ -104,6 +173,45 @@ def create_demo_interface():
|
|
| 104 |
lines=12,
|
| 105 |
max_lines=20,
|
| 106 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
generate_btn = gr.Button(
|
| 108 |
"🚀 Generate Conference (on Modal)", size="lg",
|
| 109 |
variant="primary",
|
|
@@ -116,6 +224,55 @@ def create_demo_interface():
|
|
| 116 |
|
| 117 |
def update_speaker_visibility(num_speakers):
|
| 118 |
return [gr.update(visible=(i < num_speakers)) for i in range(4)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
num_speakers.change(
|
| 121 |
fn=update_speaker_visibility,
|
|
@@ -156,6 +313,65 @@ def create_demo_interface():
|
|
| 156 |
inputs=[model_dropdown, num_speakers, script_input] + speaker_selections + [cfg_scale],
|
| 157 |
outputs=[complete_audio_output, log_output]
|
| 158 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
return interface
|
| 160 |
|
| 161 |
# --- Main Execution ---
|
|
|
|
| 19 |
]
|
| 20 |
DEFAULT_SPEAKERS = ['en-Alice_woman', 'en-Carter_man', 'en-Frank_man', 'en-Maya_woman']
|
| 21 |
|
| 22 |
+
# Male and female voice categories for smart speaker selection
|
| 23 |
+
MALE_VOICES = [
|
| 24 |
+
"en-Carter_man",
|
| 25 |
+
"en-Frank_man",
|
| 26 |
+
"en-Yasser_man",
|
| 27 |
+
"in-Samuel_man",
|
| 28 |
+
"zh-Anchen_man_bgm",
|
| 29 |
+
"zh-Bowen_man"
|
| 30 |
+
]
|
| 31 |
+
FEMALE_VOICES = [
|
| 32 |
+
"en-Alice_woman_bgm",
|
| 33 |
+
"en-Alice_woman",
|
| 34 |
+
"en-Maya_woman",
|
| 35 |
+
"zh-Xinran_woman"
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
# Load example scripts
|
| 39 |
+
def load_example_scripts():
|
| 40 |
+
examples_dir = "text_examples"
|
| 41 |
+
example_scripts = []
|
| 42 |
+
example_scripts_natural = []
|
| 43 |
+
|
| 44 |
+
if not os.path.exists(examples_dir):
|
| 45 |
+
return example_scripts, example_scripts_natural
|
| 46 |
+
|
| 47 |
+
original_files = [
|
| 48 |
+
"1p_ai_tedtalk.txt",
|
| 49 |
+
"1p_politcal_speech.txt",
|
| 50 |
+
"2p_financeipo_meeting.txt",
|
| 51 |
+
"2p_telehealth_meeting.txt",
|
| 52 |
+
"3p_military_meeting.txt",
|
| 53 |
+
"3p_oil_meeting.txt",
|
| 54 |
+
"4p_gamecreation_meeting.txt",
|
| 55 |
+
"4p_product_meeting.txt"
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
for txt_file in original_files:
|
| 59 |
+
file_path = os.path.join(examples_dir, txt_file)
|
| 60 |
+
natural_file = txt_file.replace(".txt", "_natural.txt")
|
| 61 |
+
natural_path = os.path.join(examples_dir, natural_file)
|
| 62 |
+
|
| 63 |
+
if os.path.exists(file_path):
|
| 64 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 65 |
+
example_scripts.append(f.read())
|
| 66 |
+
else:
|
| 67 |
+
example_scripts.append("")
|
| 68 |
+
|
| 69 |
+
if os.path.exists(natural_path):
|
| 70 |
+
with open(natural_path, 'r', encoding='utf-8') as f:
|
| 71 |
+
example_scripts_natural.append(f.read())
|
| 72 |
+
else:
|
| 73 |
+
example_scripts_natural.append(example_scripts[-1] if example_scripts else "")
|
| 74 |
+
|
| 75 |
+
return example_scripts, example_scripts_natural
|
| 76 |
+
|
| 77 |
+
# Gender mapping for each script's speakers
|
| 78 |
+
SCRIPT_SPEAKER_GENDERS = [
|
| 79 |
+
["female"], # AI TED Talk - Rachel
|
| 80 |
+
["neutral"], # Political Speech - generic speaker
|
| 81 |
+
["male", "female"], # Finance IPO - James, Patricia
|
| 82 |
+
["female", "male"], # Telehealth - Jennifer, Tom
|
| 83 |
+
["female", "male", "female"], # Military - Sarah, David, Lisa
|
| 84 |
+
["male", "female", "male"], # Oil - Robert, Lisa, Michael
|
| 85 |
+
["male", "female", "male", "male"], # Game Creation - Alex, Sarah, Marcus, Emma
|
| 86 |
+
["female", "male", "female", "male"] # Product Meeting - Sarah, Marcus, Jennifer, David
|
| 87 |
+
]
|
| 88 |
+
|
| 89 |
+
EXAMPLE_SCRIPTS, EXAMPLE_SCRIPTS_NATURAL = load_example_scripts()
|
| 90 |
+
|
| 91 |
# --- Modal Connection ---
|
| 92 |
try:
|
| 93 |
# Look up the remote class
|
|
|
|
| 123 |
alt="VibeVoice Banner">
|
| 124 |
</div>
|
| 125 |
""")
|
| 126 |
+
gr.Markdown("## NOTE: The Large model takes significant generation time with limited increase in quality. I recommend trying 1.5B first.")
|
| 127 |
|
| 128 |
with gr.Tabs():
|
| 129 |
with gr.Tab("Generate"):
|
|
|
|
| 173 |
lines=12,
|
| 174 |
max_lines=20,
|
| 175 |
)
|
| 176 |
+
|
| 177 |
+
with gr.Row():
|
| 178 |
+
with gr.Column(scale=1):
|
| 179 |
+
gr.Markdown("### Example Scripts")
|
| 180 |
+
with gr.Row():
|
| 181 |
+
use_natural = gr.Checkbox(
|
| 182 |
+
value=True,
|
| 183 |
+
label="Natural talking sounds",
|
| 184 |
+
scale=1
|
| 185 |
+
)
|
| 186 |
+
duration_display = gr.Textbox(
|
| 187 |
+
value="",
|
| 188 |
+
label="Est. Duration",
|
| 189 |
+
interactive=False,
|
| 190 |
+
scale=1
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
example_names = [
|
| 194 |
+
"AI TED Talk",
|
| 195 |
+
"Political Speech",
|
| 196 |
+
"Finance IPO Meeting",
|
| 197 |
+
"Telehealth Meeting",
|
| 198 |
+
"Military Meeting",
|
| 199 |
+
"Oil Meeting",
|
| 200 |
+
"Game Creation Meeting",
|
| 201 |
+
"Product Meeting"
|
| 202 |
+
]
|
| 203 |
+
|
| 204 |
+
example_buttons = []
|
| 205 |
+
with gr.Row():
|
| 206 |
+
for i in range(min(4, len(example_names))):
|
| 207 |
+
btn = gr.Button(example_names[i], size="sm", variant="secondary")
|
| 208 |
+
example_buttons.append(btn)
|
| 209 |
+
|
| 210 |
+
with gr.Row():
|
| 211 |
+
for i in range(4, min(8, len(example_names))):
|
| 212 |
+
btn = gr.Button(example_names[i], size="sm", variant="secondary")
|
| 213 |
+
example_buttons.append(btn)
|
| 214 |
+
|
| 215 |
generate_btn = gr.Button(
|
| 216 |
"🚀 Generate Conference (on Modal)", size="lg",
|
| 217 |
variant="primary",
|
|
|
|
| 224 |
|
| 225 |
def update_speaker_visibility(num_speakers):
|
| 226 |
return [gr.update(visible=(i < num_speakers)) for i in range(4)]
|
| 227 |
+
|
| 228 |
+
def smart_speaker_selection(gender_list):
|
| 229 |
+
"""Select speakers based on gender requirements."""
|
| 230 |
+
selected = []
|
| 231 |
+
for gender in gender_list:
|
| 232 |
+
if gender == "male" and MALE_VOICES:
|
| 233 |
+
available = [v for v in MALE_VOICES if v not in selected]
|
| 234 |
+
if available:
|
| 235 |
+
selected.append(available[0])
|
| 236 |
+
else:
|
| 237 |
+
selected.append(MALE_VOICES[0])
|
| 238 |
+
elif gender == "female" and FEMALE_VOICES:
|
| 239 |
+
available = [v for v in FEMALE_VOICES if v not in selected]
|
| 240 |
+
if available:
|
| 241 |
+
selected.append(available[0])
|
| 242 |
+
else:
|
| 243 |
+
selected.append(FEMALE_VOICES[0])
|
| 244 |
+
else:
|
| 245 |
+
# neutral or fallback
|
| 246 |
+
available = [v for v in AVAILABLE_VOICES if v not in selected]
|
| 247 |
+
if available:
|
| 248 |
+
selected.append(available[0])
|
| 249 |
+
else:
|
| 250 |
+
selected.append(AVAILABLE_VOICES[0])
|
| 251 |
+
return selected
|
| 252 |
+
|
| 253 |
+
def load_specific_example(idx, natural):
|
| 254 |
+
"""Load a specific example script."""
|
| 255 |
+
if idx >= len(EXAMPLE_SCRIPTS):
|
| 256 |
+
return [2, ""] + [None, None, None, None]
|
| 257 |
+
|
| 258 |
+
script = EXAMPLE_SCRIPTS_NATURAL[idx] if natural else EXAMPLE_SCRIPTS[idx]
|
| 259 |
+
genders = SCRIPT_SPEAKER_GENDERS[idx] if idx < len(SCRIPT_SPEAKER_GENDERS) else ["neutral"]
|
| 260 |
+
speakers = smart_speaker_selection(genders)
|
| 261 |
+
|
| 262 |
+
# Pad speakers to 4
|
| 263 |
+
while len(speakers) < 4:
|
| 264 |
+
speakers.append(None)
|
| 265 |
+
|
| 266 |
+
return [len(genders), script] + speakers[:4]
|
| 267 |
+
|
| 268 |
+
# Connect example buttons
|
| 269 |
+
for idx, btn in enumerate(example_buttons):
|
| 270 |
+
btn.click(
|
| 271 |
+
fn=lambda nat, i=idx: load_specific_example(i, nat),
|
| 272 |
+
inputs=[use_natural],
|
| 273 |
+
outputs=[num_speakers, script_input] + speaker_selections,
|
| 274 |
+
queue=False
|
| 275 |
+
)
|
| 276 |
|
| 277 |
num_speakers.change(
|
| 278 |
fn=update_speaker_visibility,
|
|
|
|
| 313 |
inputs=[model_dropdown, num_speakers, script_input] + speaker_selections + [cfg_scale],
|
| 314 |
outputs=[complete_audio_output, log_output]
|
| 315 |
)
|
| 316 |
+
|
| 317 |
+
with gr.Tab("Architecture"):
|
| 318 |
+
with gr.Row():
|
| 319 |
+
gr.Markdown("""VibeVoice is a novel framework designed for generating expressive, long-form, multi-speaker conversational audio,
|
| 320 |
+
such as conferences, from text. It addresses significant challenges in traditional Text-to-Speech (TTS) systems, particularly
|
| 321 |
+
in scalability, speaker consistency, and natural turn-taking. A core innovation of VibeVoice is its use of continuous
|
| 322 |
+
speech tokenizers (Acoustic and Semantic) operating at an ultra-low frame rate of 7.5 Hz. These tokenizers efficiently
|
| 323 |
+
preserve audio fidelity while significantly boosting computational efficiency for processing long sequences. VibeVoice
|
| 324 |
+
employs a next-token diffusion framework, leveraging a Large Language Model (LLM) to understand textual context and
|
| 325 |
+
dialogue flow, and a diffusion head to generate high-fidelity acoustic details. The model can synthesize speech up to
|
| 326 |
+
90 minutes long with up to 4 distinct speakers, surpassing the typical 1-2 speaker limits of many prior models.""")
|
| 327 |
+
with gr.Row():
|
| 328 |
+
with gr.Column():
|
| 329 |
+
gr.Markdown("## VibeVoice: A Frontier Open-Source Text-to-Speech Model")
|
| 330 |
+
|
| 331 |
+
gr.Markdown("""
|
| 332 |
+
### Overview
|
| 333 |
+
|
| 334 |
+
VibeVoice is a novel framework designed for generating expressive, long-form, multi-speaker conversational audio,
|
| 335 |
+
such as conferences, from text. It addresses significant challenges in traditional Text-to-Speech (TTS) systems,
|
| 336 |
+
particularly in scalability, speaker consistency, and natural turn-taking.
|
| 337 |
+
|
| 338 |
+
### Key Features
|
| 339 |
+
|
| 340 |
+
- **Multi-Speaker Support**: Handles up to 4 distinct speakers
|
| 341 |
+
- **Long-Form Generation**: Synthesizes speech up to 90 minutes
|
| 342 |
+
- **Natural Conversation Flow**: Includes turn-taking and interruptions
|
| 343 |
+
- **Ultra-Low Frame Rate**: 7.5 Hz tokenizers for efficiency
|
| 344 |
+
- **High Fidelity**: Preserves acoustic details while being computationally efficient
|
| 345 |
+
|
| 346 |
+
### Technical Architecture
|
| 347 |
+
|
| 348 |
+
1. **Continuous Speech Tokenizers**: Acoustic and Semantic tokenizers at 7.5 Hz
|
| 349 |
+
2. **Next-Token Diffusion Framework**: Combines LLM understanding with diffusion generation
|
| 350 |
+
3. **Large Language Model**: Understands context and dialogue flow
|
| 351 |
+
4. **Diffusion Head**: Generates high-fidelity acoustic details
|
| 352 |
+
""")
|
| 353 |
+
|
| 354 |
+
with gr.Column():
|
| 355 |
+
gr.HTML("""
|
| 356 |
+
<div style="width: 100%; padding: 20px;">
|
| 357 |
+
<img src="https://huggingface.co/spaces/ACloudCenter/Conference-Generator-VibeVoice/resolve/main/public/images/diagram.jpg"
|
| 358 |
+
style="width: 100%; height: auto; border-radius: 10px; box-shadow: 0 5px 20px rgba(0,0,0,0.15);"
|
| 359 |
+
alt="VibeVoice Architecture Diagram">
|
| 360 |
+
</div>
|
| 361 |
+
""")
|
| 362 |
+
|
| 363 |
+
gr.Markdown("""
|
| 364 |
+
### Model Variants
|
| 365 |
+
|
| 366 |
+
**VibeVoice-1.5B**: Faster inference, suitable for real-time applications
|
| 367 |
+
**VibeVoice-7B**: Higher quality output, recommended for production use
|
| 368 |
+
|
| 369 |
+
### Performance Metrics
|
| 370 |
+
|
| 371 |
+
<img src="https://huggingface.co/spaces/ACloudCenter/Conference-Generator-VibeVoice/resolve/main/public/images/chart.png"
|
| 372 |
+
style="width: 100%; height: auto; border-radius: 10px; margin-top: 20px;"
|
| 373 |
+
alt="Performance Comparison">
|
| 374 |
+
""")
|
| 375 |
return interface
|
| 376 |
|
| 377 |
# --- Main Execution ---
|
backend_modal/modal_runner.py
CHANGED
|
@@ -29,7 +29,7 @@ image = (
|
|
| 29 |
.add_local_dir("backend_modal/modular", remote_path="/root/modular")
|
| 30 |
.add_local_dir("backend_modal/processor", remote_path="/root/processor")
|
| 31 |
.add_local_dir("backend_modal/voices", remote_path="/root/voices")
|
| 32 |
-
.add_local_dir("
|
| 33 |
.add_local_dir("backend_modal/schedule", remote_path="/root/schedule")
|
| 34 |
)
|
| 35 |
|
|
@@ -117,6 +117,59 @@ class VibeVoiceModel:
|
|
| 117 |
print(f"Error reading audio {audio_path}: {e}")
|
| 118 |
return np.array([])
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
@modal.method()
|
| 121 |
def generate_podcast(self,
|
| 122 |
num_speakers: int,
|
|
|
|
| 29 |
.add_local_dir("backend_modal/modular", remote_path="/root/modular")
|
| 30 |
.add_local_dir("backend_modal/processor", remote_path="/root/processor")
|
| 31 |
.add_local_dir("backend_modal/voices", remote_path="/root/voices")
|
| 32 |
+
.add_local_dir("text_examples", remote_path="/root/text_examples")
|
| 33 |
.add_local_dir("backend_modal/schedule", remote_path="/root/schedule")
|
| 34 |
)
|
| 35 |
|
|
|
|
| 117 |
print(f"Error reading audio {audio_path}: {e}")
|
| 118 |
return np.array([])
|
| 119 |
|
| 120 |
+
@staticmethod
|
| 121 |
+
def _infer_num_speakers_from_script(script: str) -> int:
|
| 122 |
+
"""
|
| 123 |
+
Infer number of speakers by counting distinct 'Speaker X:' tags in the script.
|
| 124 |
+
Robust to 0- or 1-indexed labels and repeated turns.
|
| 125 |
+
Falls back to 1 if none found.
|
| 126 |
+
"""
|
| 127 |
+
import re
|
| 128 |
+
ids = re.findall(r'(?mi)^\s*Speaker\s+(\d+)\s*:', script)
|
| 129 |
+
return len({int(x) for x in ids}) if ids else 1
|
| 130 |
+
|
| 131 |
+
@modal.method()
|
| 132 |
+
def get_example_scripts(self):
|
| 133 |
+
examples_dir = "/root/text_examples"
|
| 134 |
+
example_scripts = []
|
| 135 |
+
example_scripts_natural = []
|
| 136 |
+
if not os.path.exists(examples_dir):
|
| 137 |
+
return [], []
|
| 138 |
+
|
| 139 |
+
original_files = [
|
| 140 |
+
"1p_ai_tedtalk.txt",
|
| 141 |
+
"1p_politcal_speech.txt",
|
| 142 |
+
"2p_financeipo_meeting.txt",
|
| 143 |
+
"2p_telehealth_meeting.txt",
|
| 144 |
+
"3p_military_meeting.txt",
|
| 145 |
+
"3p_oil_meeting.txt",
|
| 146 |
+
"4p_gamecreation_meeting.txt",
|
| 147 |
+
"4p_product_meeting.txt"
|
| 148 |
+
]
|
| 149 |
+
|
| 150 |
+
for txt_file in original_files:
|
| 151 |
+
try:
|
| 152 |
+
with open(os.path.join(examples_dir, txt_file), 'r', encoding='utf-8') as f:
|
| 153 |
+
script_content = f.read().strip()
|
| 154 |
+
if script_content:
|
| 155 |
+
num_speakers = self._infer_num_speakers_from_script(script_content)
|
| 156 |
+
example_scripts.append([num_speakers, script_content])
|
| 157 |
+
|
| 158 |
+
natural_file = txt_file.replace('.txt', '_natural.txt')
|
| 159 |
+
natural_path = os.path.join(examples_dir, natural_file)
|
| 160 |
+
if os.path.exists(natural_path):
|
| 161 |
+
with open(natural_path, 'r', encoding='utf-8') as f:
|
| 162 |
+
natural_content = f.read().strip()
|
| 163 |
+
if natural_content:
|
| 164 |
+
num_speakers = self._infer_num_speakers_from_script(natural_content)
|
| 165 |
+
example_scripts_natural.append([num_speakers, natural_content])
|
| 166 |
+
else:
|
| 167 |
+
example_scripts_natural.append([num_speakers, script_content])
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error loading {txt_file}: {e}")
|
| 170 |
+
|
| 171 |
+
return example_scripts, example_scripts_natural
|
| 172 |
+
|
| 173 |
@modal.method()
|
| 174 |
def generate_podcast(self,
|
| 175 |
num_speakers: int,
|