Spaces:

aipoc
/

VideoAnalytics

Runtime error

App Files Files Community

KarthickAdopleAI commited on Mar 29, 2024

Commit

3ae161b

verified ·

1 Parent(s): 95d1f8b

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -7

app.py CHANGED Viewed

@@ -15,8 +15,9 @@ import requests
 import logging
 import os
 from pydub import AudioSegment
-from pydub.silence import split_on_silence
 import speech_recognition as sr
 nltk.download('punkt')
 nltk.download('stopwords')
@@ -43,6 +44,7 @@ class VideoAnalytics:
       self.r = sr.Recognizer()
       # Initialize english text variable
       self.english_text = ""
@@ -84,12 +86,12 @@ class VideoAnalytics:
             raise e
     # Function to recognize speech in the audio file
-    def transcribe_audio(self,path):
         """Transcribe speech from an audio file."""
         try:
             with sr.AudioFile(path) as source:
                 audio_listened = self.r.record(source)
-                text = self.r.recognize_google(audio_listened)
             return text
         except sr.UnknownValueError as e:
             logging.error(f"Speech recognition could not understand audio: {e}")
@@ -99,7 +101,7 @@ class VideoAnalytics:
             return ""
     # Function to split the audio file into chunks on silence and apply speech recognition
-    def get_large_audio_transcription_on_silence(self,path):
         """Split the large audio file into chunks and apply speech recognition on each chunk."""
         try:
             sound = AudioSegment.from_file(path)
@@ -115,7 +117,7 @@ class VideoAnalytics:
                 chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
                 audio_chunk.export(chunk_filename, format="wav")
-                text = self.transcribe_audio(chunk_filename)
                 if text:
                     text = f"{text.capitalize()}. "
@@ -148,8 +150,11 @@ class VideoAnalytics:
           # Replace 'input.mp3' and 'output.wav' with your file paths
           audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
-          text  = self.get_large_audio_transcription_on_silence(audio_filename)
           # Update the transcribed_text attribute with the transcription result
           self.transcribed_text = text
           # Update the translation text into english_text

 import logging
 import os
 from pydub import AudioSegment
 import speech_recognition as sr
+import torchaudio
+from speechbrain.inference.classifiers import EncoderClassifier
 nltk.download('punkt')
 nltk.download('stopwords')
       self.r = sr.Recognizer()
+      self.language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")
       # Initialize english text variable
       self.english_text = ""
             raise e
     # Function to recognize speech in the audio file
+    def transcribe_audio(self,path: str,lang: str):
         """Transcribe speech from an audio file."""
         try:
             with sr.AudioFile(path) as source:
                 audio_listened = self.r.record(source)
+                text = self.r.recognize_google(audio_listened,language=lang)
             return text
         except sr.UnknownValueError as e:
             logging.error(f"Speech recognition could not understand audio: {e}")
             return ""
     # Function to split the audio file into chunks on silence and apply speech recognition
+    def get_large_audio_transcription_on_silence(self,path: str,lang: str):
         """Split the large audio file into chunks and apply speech recognition on each chunk."""
         try:
             sound = AudioSegment.from_file(path)
                 chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
                 audio_chunk.export(chunk_filename, format="wav")
+                text = self.transcribe_audio(chunk_filename,lang)
                 if text:
                     text = f"{text.capitalize()}. "
           # Replace 'input.mp3' and 'output.wav' with your file paths
           audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
+          # for detect lang
+          signal = self.language_id.load_audio("/content/output_.wav")
+          prediction =  self.language_id.classify_batch(signal)
+          lang = [prediction[3][0].split(":")][0][0]
+          text  = self.get_large_audio_transcription_on_silence(audio_filename,lang)
           # Update the transcribed_text attribute with the transcription result
           self.transcribed_text = text
           # Update the translation text into english_text