malek-messaoudii commited on
Commit
691fd14
·
1 Parent(s): 56dc677

Refactor requirements and chatbot services; enhance service initialization and error handling for TTS and STT functionalities

Browse files
requirements.txt CHANGED
@@ -1,22 +1,24 @@
1
- # System dependencies (install ffmpeg)
2
- --find-links https://wheel-index.linuxserver.io/ubuntu/
3
- ffmpeg
4
-
5
- # Python dependencies
6
  fastapi==0.104.1
7
  uvicorn[standard]==0.24.0
8
- pydantic==2.5.0
9
  python-dotenv==1.0.0
10
- torch>=2.0.0
11
- transformers>=4.35.0
12
- protobuf>=3.20.0
13
- huggingface_hub>=0.19.0
14
- python-multipart
15
- google-genai>=0.4.0
16
- requests==2.31.0
17
- soundfile==0.12.1
18
  gtts==2.3.2
 
 
 
19
  SpeechRecognition==3.10.0
20
  pyttsx3==2.90
21
- accelerate>=0.20.0
22
- coqui-tts==0.21.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
+ python-multipart==0.0.6
4
  python-dotenv==1.0.0
 
 
 
 
 
 
 
 
5
  gtts==2.3.2
6
+ pydantic==2.5.0
7
+
8
+ # Optional - comment out if causing issues
9
  SpeechRecognition==3.10.0
10
  pyttsx3==2.90
11
+
12
+ # NLP models (optional)
13
+ transformers==4.35.0
14
+ torch==2.0.1
15
+ accelerate==0.20.0
16
+ huggingface_hub==0.19.0
17
+ protobuf==3.20.0
18
+
19
+ # Use TTS instead of coqui-tts
20
+ TTS==0.22.1
21
+
22
+ # Audio processing
23
+ soundfile==0.12.1
24
+ requests==2.31.0
services/chatbot_service.py CHANGED
@@ -3,32 +3,47 @@ import uuid
3
  from typing import Optional, Dict, Any
4
  from datetime import datetime
5
  from models.audio import ChatbotResponse, UserMessage
6
- from services.tts_service import SimpleTTSService # Use simple version
7
- from services.stt_service import STTService # Use basic version
8
 
9
  class ChatbotService:
10
  def __init__(self):
11
- self.tts_service = SimpleTTSService() # Use simple TTS
12
- self.stt_service = STTService() # Use basic STT
13
  self.sessions: Dict[str, Dict[str, Any]] = {}
14
  self.initialized = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  async def initialize(self):
17
  """Initialize the chatbot service"""
18
- await self.stt_service.initialize()
 
19
  self.initialized = True
20
- print("✓ Chatbot Service initialized")
21
 
22
  async def process_user_message(self, user_message: UserMessage) -> ChatbotResponse:
23
  # Update session
24
  session = self._get_or_create_session(user_message.session_id)
25
 
26
  # Process message based on type
27
- if user_message.message_type == "audio":
28
- # STT: Convert audio to text
29
- text_input = await self.stt_service.transcribe_audio_base64(
30
- user_message.content
31
- )
32
  else:
33
  text_input = user_message.content
34
 
@@ -43,11 +58,9 @@ class ChatbotService:
43
  chatbot_text = await self._generate_chatbot_response(text_input, session)
44
 
45
  # TTS: Convert response to audio
46
- try:
 
47
  audio_base64 = await self.tts_service.text_to_speech_base64(chatbot_text)
48
- except Exception as e:
49
- print(f"TTS error: {e}")
50
- audio_base64 = None
51
 
52
  # Create response
53
  response = ChatbotResponse(
@@ -69,19 +82,24 @@ class ChatbotService:
69
 
70
  async def _generate_chatbot_response(self, user_input: str, session: Dict[str, Any]) -> str:
71
  """Chatbot response generation logic"""
72
- # Simple response logic - replace with your actual chatbot model
73
  user_input_lower = user_input.lower()
74
 
75
- if "hello" in user_input_lower or "hi" in user_input_lower:
76
- return "Hello! How can I assist you today?"
77
- elif "time" in user_input_lower:
 
78
  return f"The current time is {datetime.now().strftime('%H:%M')}"
79
- elif "help" in user_input_lower:
80
- return "I'm here to help you. You can ask me questions or request assistance."
81
- elif "audio" in user_input_lower or "voice" in user_input_lower:
82
- return "I can process both text and voice messages. Try sending me a voice message!"
83
- else:
84
- return f"I received your message: '{user_input}'. How can I assist you further?"
 
 
 
 
 
85
 
86
  def _get_or_create_session(self, session_id: str) -> Dict[str, Any]:
87
  if session_id not in self.sessions:
@@ -90,9 +108,6 @@ class ChatbotService:
90
  "created_at": datetime.now(),
91
  "last_activity": datetime.now()
92
  }
93
- else:
94
- self.sessions[session_id]["last_activity"] = datetime.now()
95
-
96
  return self.sessions[session_id]
97
 
98
  def get_session_history(self, session_id: str) -> Optional[Dict[str, Any]]:
 
3
  from typing import Optional, Dict, Any
4
  from datetime import datetime
5
  from models.audio import ChatbotResponse, UserMessage
 
 
6
 
7
  class ChatbotService:
8
  def __init__(self):
 
 
9
  self.sessions: Dict[str, Dict[str, Any]] = {}
10
  self.initialized = False
11
+ self._initialize_services()
12
+
13
+ def _initialize_services(self):
14
+ """Initialize services"""
15
+ try:
16
+ from services.tts_service import SimpleTTSService
17
+ self.tts_service = SimpleTTSService()
18
+ print("✓ TTS service initialized")
19
+ except ImportError as e:
20
+ print(f"⚠️ TTS service not available: {e}")
21
+ self.tts_service = None
22
+
23
+ try:
24
+ from services.stt_service import STTService
25
+ self.stt_service = STTService()
26
+ print("✓ STT service initialized")
27
+ except ImportError as e:
28
+ print(f"⚠️ STT service not available: {e}")
29
+ self.stt_service = None
30
 
31
  async def initialize(self):
32
  """Initialize the chatbot service"""
33
+ if self.stt_service:
34
+ await self.stt_service.initialize()
35
  self.initialized = True
36
+ print("✓ Chatbot Service fully initialized")
37
 
38
  async def process_user_message(self, user_message: UserMessage) -> ChatbotResponse:
39
  # Update session
40
  session = self._get_or_create_session(user_message.session_id)
41
 
42
  # Process message based on type
43
+ if user_message.message_type == "audio" and self.stt_service:
44
+ text_input = await self.stt_service.transcribe_audio_base64(user_message.content)
45
+ elif user_message.message_type == "audio":
46
+ text_input = "[Voice message received]"
 
47
  else:
48
  text_input = user_message.content
49
 
 
58
  chatbot_text = await self._generate_chatbot_response(text_input, session)
59
 
60
  # TTS: Convert response to audio
61
+ audio_base64 = None
62
+ if self.tts_service:
63
  audio_base64 = await self.tts_service.text_to_speech_base64(chatbot_text)
 
 
 
64
 
65
  # Create response
66
  response = ChatbotResponse(
 
82
 
83
  async def _generate_chatbot_response(self, user_input: str, session: Dict[str, Any]) -> str:
84
  """Chatbot response generation logic"""
 
85
  user_input_lower = user_input.lower()
86
 
87
+ if any(greet in user_input_lower for greet in ["hello", "hi", "hey"]):
88
+ return "Hello! I'm your voice assistant. How can I help you today?"
89
+
90
+ if "time" in user_input_lower:
91
  return f"The current time is {datetime.now().strftime('%H:%M')}"
92
+
93
+ if "help" in user_input_lower:
94
+ return "I can process both text and voice messages. Try sending me a voice note!"
95
+
96
+ if "name" in user_input_lower:
97
+ return "I'm your AI voice assistant. I'm here to help with your questions!"
98
+
99
+ if "voice" in user_input_lower or "audio" in user_input_lower:
100
+ return "Yes! I support voice messages. You can speak to me and I'll respond with audio too!"
101
+
102
+ return f"I understand you said: '{user_input}'. How can I assist you further?"
103
 
104
  def _get_or_create_session(self, session_id: str) -> Dict[str, Any]:
105
  if session_id not in self.sessions:
 
108
  "created_at": datetime.now(),
109
  "last_activity": datetime.now()
110
  }
 
 
 
111
  return self.sessions[session_id]
112
 
113
  def get_session_history(self, session_id: str) -> Optional[Dict[str, Any]]:
services/tts_service.py CHANGED
@@ -1,121 +1,48 @@
1
  import base64
2
- import io
3
  import tempfile
4
  import os
5
- from gtts import gTTS
6
- import pyttsx3
7
 
8
- class TTSService:
9
  def __init__(self):
10
- self.models = {}
11
- self._initialize_models()
12
 
13
- def _initialize_models(self):
14
- """Initialize TTS models"""
15
- # gTTS is our primary method (always available)
16
- self.models["gtts"] = True
17
-
18
- # Try to initialize pyttsx3 as fallback
19
- try:
20
- self.models["pyttsx3"] = pyttsx3.init()
21
- print("✓ pyttsx3 TTS initialized")
22
- except:
23
- print("⚠️ pyttsx3 not available")
24
- self.models["pyttsx3"] = None
25
-
26
- # Coqui TTS is optional
27
- self.models["coqui"] = self._initialize_coqui_tts()
28
-
29
- def _initialize_coqui_tts(self):
30
- """Initialize Coqui TTS if available"""
31
  try:
32
- from TTS.api import TTS
33
- tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
34
- print("✓ Coqui TTS initialized")
35
- return tts_model
36
  except ImportError:
37
- print("⚠️ Coqui TTS not available. Install with: pip install TTS")
38
- return None
39
- except Exception as e:
40
- print(f"⚠️ Coqui TTS initialization failed: {e}")
41
- return None
42
 
43
- async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
44
- """Convert text to base64 audio"""
45
- # Try gTTS first (most reliable and free)
46
- try:
47
- return await self._gtts_to_base64(text, language)
48
- except Exception as e:
49
- print(f"gTTS error: {e}")
50
-
51
- # Fallback to pyttsx3
52
- try:
53
- if self.models.get("pyttsx3"):
54
- return await self._pyttsx3_to_base64(text)
55
- except Exception as e:
56
- print(f"pyttsx3 error: {e}")
57
-
58
- # Final fallback to Coqui TTS
59
- try:
60
- if self.models.get("coqui"):
61
- return await self._coqui_to_base64(text)
62
- except Exception as e:
63
- print(f"Coqui TTS error: {e}")
64
 
65
- raise Exception("All TTS services failed")
66
-
67
- async def _gtts_to_base64(self, text: str, language: str) -> str:
68
- """Convert using gTTS"""
69
- tts = gTTS(text=text, lang=language, slow=False)
70
- audio_buffer = io.BytesIO()
71
- tts.write_to_fp(audio_buffer)
72
- audio_buffer.seek(0)
73
- return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
74
-
75
- async def _pyttsx3_to_base64(self, text: str) -> str:
76
- """Convert using pyttsx3"""
77
- engine = self.models["pyttsx3"]
78
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
79
- temp_path = temp_file.name
80
-
81
- engine.save_to_file(text, temp_path)
82
- engine.runAndWait()
83
-
84
- with open(temp_path, 'rb') as audio_file:
85
- audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
86
-
87
- # Cleanup
88
- os.unlink(temp_path)
89
- return audio_base64
90
-
91
- async def _coqui_to_base64(self, text: str) -> str:
92
- """Convert using Coqui TTS"""
93
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
94
- temp_path = temp_file.name
95
-
96
- self.models["coqui"].tts_to_file(text=text, file_path=temp_path)
97
-
98
- with open(temp_path, 'rb') as audio_file:
99
- audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
100
-
101
- # Cleanup
102
- os.unlink(temp_path)
103
- return audio_base64
104
-
105
- # Simple TTS service that only uses gTTS (minimal dependencies)
106
- class SimpleTTSService:
107
- def __init__(self):
108
- pass
109
-
110
- async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
111
- """Convert text to base64 audio using only gTTS"""
112
  try:
113
- tts = gTTS(text=text, lang=language, slow=False)
114
- audio_buffer = io.BytesIO()
115
- tts.write_to_fp(audio_buffer)
116
- audio_buffer.seek(0)
117
- return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  except Exception as e:
119
- print(f"gTTS error: {e}")
120
- # Return a placeholder audio or error message
121
- return "TTS_ERROR_PLACEHOLDER"
 
1
  import base64
 
2
  import tempfile
3
  import os
 
 
4
 
5
+ class STTService:
6
  def __init__(self):
7
+ self.initialized = False
8
+ self.recognizer = None
9
 
10
+ async def initialize(self):
11
+ """Initialize STT service"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  try:
13
+ import speech_recognition as sr
14
+ self.recognizer = sr.Recognizer()
15
+ self.initialized = True
16
+ print("✓ STT Service initialized (SpeechRecognition)")
17
  except ImportError:
18
+ print("⚠️ SpeechRecognition not available. STT will return placeholder text.")
19
+ self.initialized = False
 
 
 
20
 
21
+ async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
22
+ """Transcribe base64 audio to text"""
23
+ if not self.initialized or not self.recognizer:
24
+ return "[Audio received - install SpeechRecognition for transcription]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  try:
27
+ import speech_recognition as sr
28
+ import io
29
+
30
+ # Decode audio
31
+ audio_data = base64.b64decode(audio_base64)
32
+
33
+ # Use SpeechRecognition
34
+ audio_file = sr.AudioFile(io.BytesIO(audio_data))
35
+
36
+ with audio_file as source:
37
+ # Adjust for ambient noise
38
+ self.recognizer.adjust_for_ambient_noise(source)
39
+ audio = self.recognizer.record(source)
40
+
41
+ return self.recognizer.recognize_google(audio, language=language)
42
+
43
+ except sr.UnknownValueError:
44
+ return "Could not understand audio"
45
+ except sr.RequestError as e:
46
+ return f"Error with speech recognition service: {e}"
47
  except Exception as e:
48
+ return f"Error processing audio: {str(e)}"