Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,8 +15,9 @@ import requests
|
|
| 15 |
import logging
|
| 16 |
import os
|
| 17 |
from pydub import AudioSegment
|
| 18 |
-
from pydub.silence import split_on_silence
|
| 19 |
import speech_recognition as sr
|
|
|
|
|
|
|
| 20 |
nltk.download('punkt')
|
| 21 |
nltk.download('stopwords')
|
| 22 |
|
|
@@ -43,6 +44,7 @@ class VideoAnalytics:
|
|
| 43 |
|
| 44 |
self.r = sr.Recognizer()
|
| 45 |
|
|
|
|
| 46 |
# Initialize english text variable
|
| 47 |
self.english_text = ""
|
| 48 |
|
|
@@ -84,12 +86,12 @@ class VideoAnalytics:
|
|
| 84 |
raise e
|
| 85 |
|
| 86 |
# Function to recognize speech in the audio file
|
| 87 |
-
def transcribe_audio(self,path):
|
| 88 |
"""Transcribe speech from an audio file."""
|
| 89 |
try:
|
| 90 |
with sr.AudioFile(path) as source:
|
| 91 |
audio_listened = self.r.record(source)
|
| 92 |
-
text = self.r.recognize_google(audio_listened)
|
| 93 |
return text
|
| 94 |
except sr.UnknownValueError as e:
|
| 95 |
logging.error(f"Speech recognition could not understand audio: {e}")
|
|
@@ -99,7 +101,7 @@ class VideoAnalytics:
|
|
| 99 |
return ""
|
| 100 |
|
| 101 |
# Function to split the audio file into chunks on silence and apply speech recognition
|
| 102 |
-
def get_large_audio_transcription_on_silence(self,path):
|
| 103 |
"""Split the large audio file into chunks and apply speech recognition on each chunk."""
|
| 104 |
try:
|
| 105 |
sound = AudioSegment.from_file(path)
|
|
@@ -115,7 +117,7 @@ class VideoAnalytics:
|
|
| 115 |
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
| 116 |
audio_chunk.export(chunk_filename, format="wav")
|
| 117 |
|
| 118 |
-
text = self.transcribe_audio(chunk_filename)
|
| 119 |
|
| 120 |
if text:
|
| 121 |
text = f"{text.capitalize()}. "
|
|
@@ -148,8 +150,11 @@ class VideoAnalytics:
|
|
| 148 |
|
| 149 |
# Replace 'input.mp3' and 'output.wav' with your file paths
|
| 150 |
audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
| 153 |
# Update the transcribed_text attribute with the transcription result
|
| 154 |
self.transcribed_text = text
|
| 155 |
# Update the translation text into english_text
|
|
|
|
| 15 |
import logging
|
| 16 |
import os
|
| 17 |
from pydub import AudioSegment
|
|
|
|
| 18 |
import speech_recognition as sr
|
| 19 |
+
import torchaudio
|
| 20 |
+
from speechbrain.inference.classifiers import EncoderClassifier
|
| 21 |
nltk.download('punkt')
|
| 22 |
nltk.download('stopwords')
|
| 23 |
|
|
|
|
| 44 |
|
| 45 |
self.r = sr.Recognizer()
|
| 46 |
|
| 47 |
+
self.language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")
|
| 48 |
# Initialize english text variable
|
| 49 |
self.english_text = ""
|
| 50 |
|
|
|
|
| 86 |
raise e
|
| 87 |
|
| 88 |
# Function to recognize speech in the audio file
|
| 89 |
+
def transcribe_audio(self,path: str,lang: str):
|
| 90 |
"""Transcribe speech from an audio file."""
|
| 91 |
try:
|
| 92 |
with sr.AudioFile(path) as source:
|
| 93 |
audio_listened = self.r.record(source)
|
| 94 |
+
text = self.r.recognize_google(audio_listened,language=lang)
|
| 95 |
return text
|
| 96 |
except sr.UnknownValueError as e:
|
| 97 |
logging.error(f"Speech recognition could not understand audio: {e}")
|
|
|
|
| 101 |
return ""
|
| 102 |
|
| 103 |
# Function to split the audio file into chunks on silence and apply speech recognition
|
| 104 |
+
def get_large_audio_transcription_on_silence(self,path: str,lang: str):
|
| 105 |
"""Split the large audio file into chunks and apply speech recognition on each chunk."""
|
| 106 |
try:
|
| 107 |
sound = AudioSegment.from_file(path)
|
|
|
|
| 117 |
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
| 118 |
audio_chunk.export(chunk_filename, format="wav")
|
| 119 |
|
| 120 |
+
text = self.transcribe_audio(chunk_filename,lang)
|
| 121 |
|
| 122 |
if text:
|
| 123 |
text = f"{text.capitalize()}. "
|
|
|
|
| 150 |
|
| 151 |
# Replace 'input.mp3' and 'output.wav' with your file paths
|
| 152 |
audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
|
| 153 |
+
# for detect lang
|
| 154 |
+
signal = self.language_id.load_audio("/content/output_.wav")
|
| 155 |
+
prediction = self.language_id.classify_batch(signal)
|
| 156 |
+
lang = [prediction[3][0].split(":")][0][0]
|
| 157 |
+
text = self.get_large_audio_transcription_on_silence(audio_filename,lang)
|
| 158 |
# Update the transcribed_text attribute with the transcription result
|
| 159 |
self.transcribed_text = text
|
| 160 |
# Update the translation text into english_text
|