harismlnaslm commited on
Commit
b35c210
·
1 Parent(s): 77cde9a

Fix 404 error: Switch from Llama 3.1 8B to DialoGPT-medium (available on HF API)

Browse files
Files changed (1) hide show
  1. app.py +25 -37
app.py CHANGED
@@ -332,8 +332,8 @@ class TextilindoAI:
332
 
333
  def __init__(self):
334
  self.api_key = os.getenv('HUGGINGFAC_API_KEY_2')
335
- # Use Meta Llama model with your API key
336
- self.model = os.getenv('DEFAULT_MODEL', 'meta-llama/Llama-3.1-8B-Instruct')
337
  self.system_prompt = self.load_system_prompt()
338
  self.data_loader = TrainingDataLoader()
339
 
@@ -489,9 +489,9 @@ Minimum purchase is 1 roll (67-70 yards)."""
489
  return self.get_fallback_response(user_message)
490
 
491
  try:
492
- # Use Llama conversation format
493
- if "llama" in self.model.lower():
494
- prompt = f"<|system|>\n{self.system_prompt}\n<|user|>\n{user_message}\n<|assistant|>\n"
495
  else:
496
  # Fallback to GPT-2 format for other models
497
  prompt = f"User: {user_message}\nAssistant:"
@@ -501,17 +501,17 @@ Minimum purchase is 1 roll (67-70 yards)."""
501
 
502
  logger.info(f"Generating response for prompt: {prompt[:100]}...")
503
 
504
- # Generate response with Llama-optimized parameters
505
- if "llama" in self.model.lower():
506
  response = self.client.text_generation(
507
  prompt,
508
- max_new_tokens=200,
509
- temperature=0.7,
510
  top_p=0.9,
511
- top_k=40,
512
  repetition_penalty=1.1,
513
  do_sample=True,
514
- stop_sequences=["<|end|>", "<|user|>", "\n\n"]
515
  )
516
  else:
517
  # GPT-2 parameters for other models
@@ -529,15 +529,15 @@ Minimum purchase is 1 roll (67-70 yards)."""
529
  logger.info(f"Raw AI response: {response[:200]}...")
530
 
531
  # Clean up the response based on model type
532
- if "llama" in self.model.lower():
533
- # Clean up Llama response
534
- if "<|assistant|>" in response:
535
- assistant_response = response.split("<|assistant|>")[-1].strip()
536
  else:
537
  assistant_response = response.strip()
538
 
539
- # Remove any remaining special tokens
540
- assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").replace("<|system|>", "").strip()
541
  else:
542
  # Clean up GPT-2 response
543
  if "Assistant:" in response:
@@ -545,8 +545,8 @@ Minimum purchase is 1 roll (67-70 yards)."""
545
  else:
546
  assistant_response = response.strip()
547
 
548
- # Remove any remaining special tokens
549
- assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").strip()
550
 
551
  # Remove any incomplete sentences or cut-off text
552
  if assistant_response.endswith(('.', '!', '?')):
@@ -1036,21 +1036,15 @@ async def get_available_models():
1036
  "success": True,
1037
  "models": [
1038
  {
1039
- "name": "meta-llama/Llama-3.1-8B-Instruct",
1040
- "description": "Llama 3.1 8B Instruct - High performance, best quality",
1041
- "size": "8B parameters",
1042
  "recommended": True
1043
  },
1044
  {
1045
- "name": "meta-llama/Llama-3.2-1B-Instruct",
1046
- "description": "Llama 3.2 1B Instruct - Fast and efficient",
1047
- "size": "1B parameters",
1048
- "recommended": True
1049
- },
1050
- {
1051
- "name": "meta-llama/Llama-3.2-3B-Instruct",
1052
- "description": "Llama 3.2 3B Instruct - Balanced performance",
1053
- "size": "3B parameters",
1054
  "recommended": True
1055
  },
1056
  {
@@ -1064,12 +1058,6 @@ async def get_available_models():
1064
  "description": "DistilGPT-2 - Even smaller and faster",
1065
  "size": "82M parameters",
1066
  "recommended": False
1067
- },
1068
- {
1069
- "name": "microsoft/DialoGPT-small",
1070
- "description": "DialoGPT Small - Conversational AI",
1071
- "size": "117M parameters",
1072
- "recommended": False
1073
  }
1074
  ]
1075
  }
 
332
 
333
  def __init__(self):
334
  self.api_key = os.getenv('HUGGINGFAC_API_KEY_2')
335
+ # Use available model with your API key
336
+ self.model = os.getenv('DEFAULT_MODEL', 'microsoft/DialoGPT-medium')
337
  self.system_prompt = self.load_system_prompt()
338
  self.data_loader = TrainingDataLoader()
339
 
 
489
  return self.get_fallback_response(user_message)
490
 
491
  try:
492
+ # Use DialoGPT conversation format
493
+ if "dialogpt" in self.model.lower():
494
+ prompt = f"User: {user_message}\nAssistant:"
495
  else:
496
  # Fallback to GPT-2 format for other models
497
  prompt = f"User: {user_message}\nAssistant:"
 
501
 
502
  logger.info(f"Generating response for prompt: {prompt[:100]}...")
503
 
504
+ # Generate response with DialoGPT-optimized parameters
505
+ if "dialogpt" in self.model.lower():
506
  response = self.client.text_generation(
507
  prompt,
508
+ max_new_tokens=150,
509
+ temperature=0.8,
510
  top_p=0.9,
511
+ top_k=50,
512
  repetition_penalty=1.1,
513
  do_sample=True,
514
+ stop_sequences=["User:", "Assistant:", "\n\n"]
515
  )
516
  else:
517
  # GPT-2 parameters for other models
 
529
  logger.info(f"Raw AI response: {response[:200]}...")
530
 
531
  # Clean up the response based on model type
532
+ if "dialogpt" in self.model.lower():
533
+ # Clean up DialoGPT response
534
+ if "Assistant:" in response:
535
+ assistant_response = response.split("Assistant:")[-1].strip()
536
  else:
537
  assistant_response = response.strip()
538
 
539
+ # Remove any remaining conversation markers
540
+ assistant_response = assistant_response.replace("User:", "").replace("Assistant:", "").strip()
541
  else:
542
  # Clean up GPT-2 response
543
  if "Assistant:" in response:
 
545
  else:
546
  assistant_response = response.strip()
547
 
548
+ # Remove any remaining conversation markers
549
+ assistant_response = assistant_response.replace("User:", "").replace("Assistant:", "").strip()
550
 
551
  # Remove any incomplete sentences or cut-off text
552
  if assistant_response.endswith(('.', '!', '?')):
 
1036
  "success": True,
1037
  "models": [
1038
  {
1039
+ "name": "microsoft/DialoGPT-medium",
1040
+ "description": "DialoGPT Medium - Best conversational AI (Recommended)",
1041
+ "size": "345M parameters",
1042
  "recommended": True
1043
  },
1044
  {
1045
+ "name": "microsoft/DialoGPT-small",
1046
+ "description": "DialoGPT Small - Fast conversational AI",
1047
+ "size": "117M parameters",
 
 
 
 
 
 
1048
  "recommended": True
1049
  },
1050
  {
 
1058
  "description": "DistilGPT-2 - Even smaller and faster",
1059
  "size": "82M parameters",
1060
  "recommended": False
 
 
 
 
 
 
1061
  }
1062
  ]
1063
  }