|
|
import gradio as gr |
|
|
import os |
|
|
import tempfile |
|
|
from pathlib import Path |
|
|
import sys |
|
|
|
|
|
|
|
|
from diarization_pyannote_demo import run_pyannote_diarization, write_rttm, write_json |
|
|
|
|
|
def diarize_audio(audio_file, model_name, num_speakers, min_speakers, max_speakers, use_exclusive): |
|
|
"""Interface Gradio pour la diarisation pyannote.""" |
|
|
|
|
|
if audio_file is None: |
|
|
return None, None, "❌ Veuillez uploader un fichier audio" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(audio_file, tuple): |
|
|
|
|
|
audio_path = audio_file[1] if len(audio_file) > 1 else audio_file[0] |
|
|
elif isinstance(audio_file, str): |
|
|
|
|
|
audio_path = audio_file |
|
|
elif hasattr(audio_file, 'name'): |
|
|
|
|
|
audio_path = audio_file.name |
|
|
else: |
|
|
|
|
|
audio_path = str(audio_file) |
|
|
|
|
|
if not os.path.exists(audio_path): |
|
|
return None, None, f"❌ Fichier audio introuvable: {audio_path}" |
|
|
|
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
|
|
|
|
result = run_pyannote_diarization( |
|
|
audio_path, |
|
|
output_dir=tmpdir, |
|
|
model_name=model_name, |
|
|
num_speakers=num_speakers if num_speakers > 0 else None, |
|
|
min_speakers=min_speakers if min_speakers > 0 else None, |
|
|
max_speakers=max_speakers if max_speakers > 0 else None, |
|
|
use_exclusive=use_exclusive, |
|
|
show_progress=False |
|
|
) |
|
|
|
|
|
|
|
|
audio_name = Path(audio_path).stem |
|
|
rttm_path = os.path.join(tmpdir, f"{audio_name}.rttm") |
|
|
json_path = os.path.join(tmpdir, f"{audio_name}.json") |
|
|
|
|
|
write_rttm(result["segments"], rttm_path, audio_name) |
|
|
write_json(result["segments"], json_path) |
|
|
|
|
|
|
|
|
summary = f""" |
|
|
# Résultats de diarisation |
|
|
|
|
|
**Fichier:** {Path(audio_path).name} |
|
|
**Modèle:** {model_name} |
|
|
**Locuteurs détectés:** {result['num_speakers']} |
|
|
**Segments:** {len(result['segments'])} |
|
|
**Durée totale:** {result.get('duration', 0):.2f} secondes |
|
|
|
|
|
## Statistiques par locuteur |
|
|
""" |
|
|
from collections import defaultdict |
|
|
speaker_stats = defaultdict(lambda: {"total_duration": 0.0, "num_segments": 0}) |
|
|
for seg in result["segments"]: |
|
|
speaker = seg["speaker"] |
|
|
duration = seg["end"] - seg["start"] |
|
|
speaker_stats[speaker]["total_duration"] += duration |
|
|
speaker_stats[speaker]["num_segments"] += 1 |
|
|
|
|
|
for speaker, stats in sorted(speaker_stats.items()): |
|
|
avg_duration = stats["total_duration"] / stats["num_segments"] if stats["num_segments"] > 0 else 0 |
|
|
summary += f"\n- **{speaker}**: {stats['num_segments']} segments, {stats['total_duration']:.2f}s total, {avg_duration:.2f}s moyenne/segment" |
|
|
|
|
|
|
|
|
return rttm_path, json_path, summary |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
error_details = traceback.format_exc() |
|
|
error_msg = f"""❌ **Erreur lors de la diarisation** |
|
|
|
|
|
**Message:** {str(e)} |
|
|
|
|
|
**Détails techniques:** |
|
|
``` |
|
|
{error_details} |
|
|
``` |
|
|
|
|
|
**Solutions possibles:** |
|
|
- Vérifiez que le fichier audio est valide |
|
|
- Assurez-vous que le token HF_TOKEN est configuré dans les secrets de la Space |
|
|
- Réessayez avec un fichier audio plus court |
|
|
""" |
|
|
return None, None, error_msg |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Gilbert - Diarisation pyannote") as demo: |
|
|
gr.Markdown(""" |
|
|
# 🎤 Gilbert - Diarisation pyannote |
|
|
|
|
|
Interface pour la diarisation de locuteurs avec pyannote.audio |
|
|
|
|
|
**Instructions:** |
|
|
1. Uploadez un fichier audio (WAV, MP3, M4A) |
|
|
2. Configurez les paramètres (optionnel) |
|
|
3. Cliquez sur "Diariser" |
|
|
4. Téléchargez les résultats (RTTM et JSON) |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
audio_input = gr.Audio( |
|
|
label="Fichier audio", |
|
|
type="filepath" |
|
|
) |
|
|
|
|
|
model_name = gr.Dropdown( |
|
|
choices=[ |
|
|
"pyannote/speaker-diarization-3.1", |
|
|
"pyannote/speaker-diarization-community-1", |
|
|
], |
|
|
value="pyannote/speaker-diarization-3.1", |
|
|
label="Modèle pyannote" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
num_speakers = gr.Number( |
|
|
label="Nombre exact de locuteurs", |
|
|
value=0, |
|
|
minimum=0, |
|
|
info="0 = auto-détection" |
|
|
) |
|
|
min_speakers = gr.Number( |
|
|
label="Min locuteurs", |
|
|
value=0, |
|
|
minimum=0, |
|
|
info="0 = pas de limite" |
|
|
) |
|
|
max_speakers = gr.Number( |
|
|
label="Max locuteurs", |
|
|
value=0, |
|
|
minimum=0, |
|
|
info="0 = pas de limite" |
|
|
) |
|
|
|
|
|
use_exclusive = gr.Checkbox( |
|
|
label="Exclusive speaker diarization", |
|
|
value=False, |
|
|
info="Simplifie la réconciliation avec transcription" |
|
|
) |
|
|
|
|
|
diarize_btn = gr.Button("🎯 Diariser", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
summary_output = gr.Markdown(label="Résumé") |
|
|
rttm_output = gr.File(label="Fichier RTTM", type="filepath") |
|
|
json_output = gr.File(label="Fichier JSON", type="filepath") |
|
|
|
|
|
diarize_btn.click( |
|
|
fn=diarize_audio, |
|
|
inputs=[audio_input, model_name, num_speakers, min_speakers, max_speakers, use_exclusive], |
|
|
outputs=[rttm_output, json_output, summary_output] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
**Note:** Vous devez avoir un token Hugging Face configuré avec accès aux modèles pyannote. |
|
|
Configurez-le avec: `export HF_TOKEN="votre_token"` |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|
|
|
|