Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on 25 days ago

Commit

691fd14

1 Parent(s): 56dc677

Refactor requirements and chatbot services; enhance service initialization and error handling for TTS and STT functionalities

Browse files

Files changed (3) hide show

requirements.txt +18 -16
services/chatbot_service.py +43 -28
services/tts_service.py +36 -109

requirements.txt CHANGED Viewed

@@ -1,22 +1,24 @@
-# System dependencies (install ffmpeg)
---find-links https://wheel-index.linuxserver.io/ubuntu/
-ffmpeg
-# Python dependencies
 fastapi==0.104.1
 uvicorn[standard]==0.24.0
-pydantic==2.5.0
 python-dotenv==1.0.0
-torch>=2.0.0
-transformers>=4.35.0
-protobuf>=3.20.0
-huggingface_hub>=0.19.0
-python-multipart
-google-genai>=0.4.0
-requests==2.31.0
-soundfile==0.12.1
 gtts==2.3.2
 SpeechRecognition==3.10.0
 pyttsx3==2.90
-accelerate>=0.20.0
-coqui-tts==0.21.0

 fastapi==0.104.1
 uvicorn[standard]==0.24.0
+python-multipart==0.0.6
 python-dotenv==1.0.0
 gtts==2.3.2
+pydantic==2.5.0
+# Optional - comment out if causing issues
 SpeechRecognition==3.10.0
 pyttsx3==2.90
+# NLP models (optional)
+transformers==4.35.0
+torch==2.0.1
+accelerate==0.20.0
+huggingface_hub==0.19.0
+protobuf==3.20.0
+# Use TTS instead of coqui-tts
+TTS==0.22.1
+# Audio processing
+soundfile==0.12.1
+requests==2.31.0

services/chatbot_service.py CHANGED Viewed

@@ -3,32 +3,47 @@ import uuid
 from typing import Optional, Dict, Any
 from datetime import datetime
 from models.audio import ChatbotResponse, UserMessage
-from services.tts_service import SimpleTTSService  # Use simple version
-from services.stt_service import STTService  # Use basic version
 class ChatbotService:
     def __init__(self):
-        self.tts_service = SimpleTTSService()  # Use simple TTS
-        self.stt_service = STTService()        # Use basic STT
         self.sessions: Dict[str, Dict[str, Any]] = {}
         self.initialized = False
     async def initialize(self):
         """Initialize the chatbot service"""
-        await self.stt_service.initialize()
         self.initialized = True
-        print("✓ Chatbot Service initialized")
     async def process_user_message(self, user_message: UserMessage) -> ChatbotResponse:
         # Update session
         session = self._get_or_create_session(user_message.session_id)
         # Process message based on type
-        if user_message.message_type == "audio":
-            # STT: Convert audio to text
-            text_input = await self.stt_service.transcribe_audio_base64(
-                user_message.content
-            )
         else:
             text_input = user_message.content
@@ -43,11 +58,9 @@ class ChatbotService:
         chatbot_text = await self._generate_chatbot_response(text_input, session)
         # TTS: Convert response to audio
-        try:
             audio_base64 = await self.tts_service.text_to_speech_base64(chatbot_text)
-        except Exception as e:
-            print(f"TTS error: {e}")
-            audio_base64 = None
         # Create response
         response = ChatbotResponse(
@@ -69,19 +82,24 @@ class ChatbotService:
     async def _generate_chatbot_response(self, user_input: str, session: Dict[str, Any]) -> str:
         """Chatbot response generation logic"""
-        # Simple response logic - replace with your actual chatbot model
         user_input_lower = user_input.lower()
-        if "hello" in user_input_lower or "hi" in user_input_lower:
-            return "Hello! How can I assist you today?"
-        elif "time" in user_input_lower:
             return f"The current time is {datetime.now().strftime('%H:%M')}"
-        elif "help" in user_input_lower:
-            return "I'm here to help you. You can ask me questions or request assistance."
-        elif "audio" in user_input_lower or "voice" in user_input_lower:
-            return "I can process both text and voice messages. Try sending me a voice message!"
-        else:
-            return f"I received your message: '{user_input}'. How can I assist you further?"
     def _get_or_create_session(self, session_id: str) -> Dict[str, Any]:
         if session_id not in self.sessions:
@@ -90,9 +108,6 @@ class ChatbotService:
                 "created_at": datetime.now(),
                 "last_activity": datetime.now()
             }
-        else:
-            self.sessions[session_id]["last_activity"] = datetime.now()
         return self.sessions[session_id]
     def get_session_history(self, session_id: str) -> Optional[Dict[str, Any]]:

 from typing import Optional, Dict, Any
 from datetime import datetime
 from models.audio import ChatbotResponse, UserMessage
 class ChatbotService:
     def __init__(self):
         self.sessions: Dict[str, Dict[str, Any]] = {}
         self.initialized = False
+        self._initialize_services()
+    def _initialize_services(self):
+        """Initialize services"""
+        try:
+            from services.tts_service import SimpleTTSService
+            self.tts_service = SimpleTTSService()
+            print("✓ TTS service initialized")
+        except ImportError as e:
+            print(f"⚠️ TTS service not available: {e}")
+            self.tts_service = None
+        try:
+            from services.stt_service import STTService
+            self.stt_service = STTService()
+            print("✓ STT service initialized")
+        except ImportError as e:
+            print(f"⚠️ STT service not available: {e}")
+            self.stt_service = None
     async def initialize(self):
         """Initialize the chatbot service"""
+        if self.stt_service:
+            await self.stt_service.initialize()
         self.initialized = True
+        print("✓ Chatbot Service fully initialized")
     async def process_user_message(self, user_message: UserMessage) -> ChatbotResponse:
         # Update session
         session = self._get_or_create_session(user_message.session_id)
         # Process message based on type
+        if user_message.message_type == "audio" and self.stt_service:
+            text_input = await self.stt_service.transcribe_audio_base64(user_message.content)
+        elif user_message.message_type == "audio":
+            text_input = "[Voice message received]"
         else:
             text_input = user_message.content
         chatbot_text = await self._generate_chatbot_response(text_input, session)
         # TTS: Convert response to audio
+        audio_base64 = None
+        if self.tts_service:
             audio_base64 = await self.tts_service.text_to_speech_base64(chatbot_text)
         # Create response
         response = ChatbotResponse(
     async def _generate_chatbot_response(self, user_input: str, session: Dict[str, Any]) -> str:
         """Chatbot response generation logic"""
         user_input_lower = user_input.lower()
+        if any(greet in user_input_lower for greet in ["hello", "hi", "hey"]):
+            return "Hello! I'm your voice assistant. How can I help you today?"
+        if "time" in user_input_lower:
             return f"The current time is {datetime.now().strftime('%H:%M')}"
+        if "help" in user_input_lower:
+            return "I can process both text and voice messages. Try sending me a voice note!"
+        if "name" in user_input_lower:
+            return "I'm your AI voice assistant. I'm here to help with your questions!"
+        if "voice" in user_input_lower or "audio" in user_input_lower:
+            return "Yes! I support voice messages. You can speak to me and I'll respond with audio too!"
+        return f"I understand you said: '{user_input}'. How can I assist you further?"
     def _get_or_create_session(self, session_id: str) -> Dict[str, Any]:
         if session_id not in self.sessions:
                 "created_at": datetime.now(),
                 "last_activity": datetime.now()
             }
         return self.sessions[session_id]
     def get_session_history(self, session_id: str) -> Optional[Dict[str, Any]]:

services/tts_service.py CHANGED Viewed

@@ -1,121 +1,48 @@
 import base64
-import io
 import tempfile
 import os
-from gtts import gTTS
-import pyttsx3
-class TTSService:
     def __init__(self):
-        self.models = {}
-        self._initialize_models()
-    def _initialize_models(self):
-        """Initialize TTS models"""
-        # gTTS is our primary method (always available)
-        self.models["gtts"] = True
-        # Try to initialize pyttsx3 as fallback
-        try:
-            self.models["pyttsx3"] = pyttsx3.init()
-            print("✓ pyttsx3 TTS initialized")
-        except:
-            print("⚠️ pyttsx3 not available")
-            self.models["pyttsx3"] = None
-        # Coqui TTS is optional
-        self.models["coqui"] = self._initialize_coqui_tts()
-    def _initialize_coqui_tts(self):
-        """Initialize Coqui TTS if available"""
         try:
-            from TTS.api import TTS
-            tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
-            print("✓ Coqui TTS initialized")
-            return tts_model
         except ImportError:
-            print("⚠️ Coqui TTS not available. Install with: pip install TTS")
-            return None
-        except Exception as e:
-            print(f"⚠️ Coqui TTS initialization failed: {e}")
-            return None
-    async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
-        """Convert text to base64 audio"""
-        # Try gTTS first (most reliable and free)
-        try:
-            return await self._gtts_to_base64(text, language)
-        except Exception as e:
-            print(f"gTTS error: {e}")
-        # Fallback to pyttsx3
-        try:
-            if self.models.get("pyttsx3"):
-                return await self._pyttsx3_to_base64(text)
-        except Exception as e:
-            print(f"pyttsx3 error: {e}")
-        # Final fallback to Coqui TTS
-        try:
-            if self.models.get("coqui"):
-                return await self._coqui_to_base64(text)
-        except Exception as e:
-            print(f"Coqui TTS error: {e}")
-        raise Exception("All TTS services failed")
-    async def _gtts_to_base64(self, text: str, language: str) -> str:
-        """Convert using gTTS"""
-        tts = gTTS(text=text, lang=language, slow=False)
-        audio_buffer = io.BytesIO()
-        tts.write_to_fp(audio_buffer)
-        audio_buffer.seek(0)
-        return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
-    async def _pyttsx3_to_base64(self, text: str) -> str:
-        """Convert using pyttsx3"""
-        engine = self.models["pyttsx3"]
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-            temp_path = temp_file.name
-        engine.save_to_file(text, temp_path)
-        engine.runAndWait()
-        with open(temp_path, 'rb') as audio_file:
-            audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
-        # Cleanup
-        os.unlink(temp_path)
-        return audio_base64
-    async def _coqui_to_base64(self, text: str) -> str:
-        """Convert using Coqui TTS"""
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-            temp_path = temp_file.name
-        self.models["coqui"].tts_to_file(text=text, file_path=temp_path)
-        with open(temp_path, 'rb') as audio_file:
-            audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
-        # Cleanup
-        os.unlink(temp_path)
-        return audio_base64
-# Simple TTS service that only uses gTTS (minimal dependencies)
-class SimpleTTSService:
-    def __init__(self):
-        pass
-    async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
-        """Convert text to base64 audio using only gTTS"""
         try:
-            tts = gTTS(text=text, lang=language, slow=False)
-            audio_buffer = io.BytesIO()
-            tts.write_to_fp(audio_buffer)
-            audio_buffer.seek(0)
-            return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
         except Exception as e:
-            print(f"gTTS error: {e}")
-            # Return a placeholder audio or error message
-            return "TTS_ERROR_PLACEHOLDER"

 import base64
 import tempfile
 import os
+class STTService:
     def __init__(self):
+        self.initialized = False
+        self.recognizer = None
+    async def initialize(self):
+        """Initialize STT service"""
         try:
+            import speech_recognition as sr
+            self.recognizer = sr.Recognizer()
+            self.initialized = True
+            print("✓ STT Service initialized (SpeechRecognition)")
         except ImportError:
+            print("⚠️ SpeechRecognition not available. STT will return placeholder text.")
+            self.initialized = False
+    async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
+        """Transcribe base64 audio to text"""
+        if not self.initialized or not self.recognizer:
+            return "[Audio received - install SpeechRecognition for transcription]"
         try:
+            import speech_recognition as sr
+            import io
+            # Decode audio
+            audio_data = base64.b64decode(audio_base64)
+            # Use SpeechRecognition
+            audio_file = sr.AudioFile(io.BytesIO(audio_data))
+            with audio_file as source:
+                # Adjust for ambient noise
+                self.recognizer.adjust_for_ambient_noise(source)
+                audio = self.recognizer.record(source)
+            return self.recognizer.recognize_google(audio, language=language)
+        except sr.UnknownValueError:
+            return "Could not understand audio"
+        except sr.RequestError as e:
+            return f"Error with speech recognition service: {e}"
         except Exception as e:
+            return f"Error processing audio: {str(e)}"