harismlnaslm commited on
Commit
4c10d31
·
1 Parent(s): c80b7c6

Configure to use HUGGINGFAC_API_KEY_2 and Meta Llama 3.1 8B as default model

Browse files
Files changed (2) hide show
  1. DEPLOYMENT.md +8 -6
  2. app.py +52 -24
DEPLOYMENT.md CHANGED
@@ -7,15 +7,17 @@
7
  Set these environment variables in your Hugging Face Space settings:
8
 
9
  ```bash
10
- # Required: Hugging Face API Key
11
- HUGGINGFACE_API_KEY=your_huggingface_api_key_here
12
 
13
- # Optional: Default model (defaults to gpt2)
14
- DEFAULT_MODEL=gpt2
15
 
16
- # Optional: Alternative lightweight models
 
 
 
17
  # DEFAULT_MODEL=distilgpt2
18
- # DEFAULT_MODEL=microsoft/DialoGPT-medium
19
  ```
20
 
21
  ### 2. Files Structure
 
7
  Set these environment variables in your Hugging Face Space settings:
8
 
9
  ```bash
10
+ # Required: Hugging Face API Key (use your secret variable name)
11
+ HUGGINGFAC_API_KEY_2=your_huggingface_api_key_here
12
 
13
+ # Optional: Default model (defaults to Llama 3.1 8B Instruct)
14
+ DEFAULT_MODEL=meta-llama/Llama-3.1-8B-Instruct
15
 
16
+ # Optional: Alternative models
17
+ # DEFAULT_MODEL=meta-llama/Llama-3.2-1B-Instruct
18
+ # DEFAULT_MODEL=meta-llama/Llama-3.2-3B-Instruct
19
+ # DEFAULT_MODEL=gpt2
20
  # DEFAULT_MODEL=distilgpt2
 
21
  ```
22
 
23
  ### 2. Files Structure
app.py CHANGED
@@ -330,14 +330,14 @@ class TextilindoAI:
330
  """Textilindo AI Assistant using HuggingFace Inference API"""
331
 
332
  def __init__(self):
333
- self.api_key = os.getenv('HUGGINGFACE_API_KEY')
334
- # Use a model available on free HuggingFace Inference API
335
- self.model = os.getenv('DEFAULT_MODEL', 'gpt2') # Use GPT-2 which is available
336
  self.system_prompt = self.load_system_prompt()
337
  self.data_loader = TrainingDataLoader()
338
 
339
  if not self.api_key:
340
- logger.warning("HUGGINGFACE_API_KEY not found. Using mock responses.")
341
  self.client = None
342
  else:
343
  try:
@@ -399,36 +399,64 @@ Minimum purchase is 1 roll (67-70 yards)."""
399
  return self.get_fallback_response(user_message)
400
 
401
  try:
402
- # Use GPT-2 conversation format
403
- prompt = f"User: {user_message}\nAssistant:"
 
 
 
 
404
 
405
  logger.info(f"Using model: {self.model}")
406
  logger.info(f"API Key present: {bool(self.api_key)}")
407
 
408
  logger.info(f"Generating response for prompt: {prompt[:100]}...")
409
 
410
- # Generate response with GPT-2 parameters
411
- response = self.client.text_generation(
412
- prompt,
413
- max_new_tokens=150,
414
- temperature=0.8,
415
- top_p=0.9,
416
- top_k=50,
417
- repetition_penalty=1.2,
418
- do_sample=True,
419
- stop_sequences=["User:", "Assistant:", "\n\n"]
420
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
  logger.info(f"Raw AI response: {response[:200]}...")
423
 
424
- # Clean up the response for GPT-2
425
- if "Assistant:" in response:
426
- assistant_response = response.split("Assistant:")[-1].strip()
 
 
 
 
 
 
 
427
  else:
428
- assistant_response = response.strip()
429
-
430
- # Remove any remaining special tokens and clean up
431
- assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").strip()
 
 
 
 
432
 
433
  # Remove any incomplete sentences or cut-off text
434
  if assistant_response.endswith(('.', '!', '?')):
 
330
  """Textilindo AI Assistant using HuggingFace Inference API"""
331
 
332
  def __init__(self):
333
+ self.api_key = os.getenv('HUGGINGFAC_API_KEY_2')
334
+ # Use Meta Llama model with your API key
335
+ self.model = os.getenv('DEFAULT_MODEL', 'meta-llama/Llama-3.1-8B-Instruct')
336
  self.system_prompt = self.load_system_prompt()
337
  self.data_loader = TrainingDataLoader()
338
 
339
  if not self.api_key:
340
+ logger.warning("HUGGINGFAC_API_KEY_2 not found. Using mock responses.")
341
  self.client = None
342
  else:
343
  try:
 
399
  return self.get_fallback_response(user_message)
400
 
401
  try:
402
+ # Use Llama conversation format
403
+ if "llama" in self.model.lower():
404
+ prompt = f"<|system|>\n{self.system_prompt}\n<|user|>\n{user_message}\n<|assistant|>\n"
405
+ else:
406
+ # Fallback to GPT-2 format for other models
407
+ prompt = f"User: {user_message}\nAssistant:"
408
 
409
  logger.info(f"Using model: {self.model}")
410
  logger.info(f"API Key present: {bool(self.api_key)}")
411
 
412
  logger.info(f"Generating response for prompt: {prompt[:100]}...")
413
 
414
+ # Generate response with Llama-optimized parameters
415
+ if "llama" in self.model.lower():
416
+ response = self.client.text_generation(
417
+ prompt,
418
+ max_new_tokens=200,
419
+ temperature=0.7,
420
+ top_p=0.9,
421
+ top_k=40,
422
+ repetition_penalty=1.1,
423
+ do_sample=True,
424
+ stop_sequences=["<|end|>", "<|user|>", "\n\n"]
425
+ )
426
+ else:
427
+ # GPT-2 parameters for other models
428
+ response = self.client.text_generation(
429
+ prompt,
430
+ max_new_tokens=150,
431
+ temperature=0.8,
432
+ top_p=0.9,
433
+ top_k=50,
434
+ repetition_penalty=1.2,
435
+ do_sample=True,
436
+ stop_sequences=["User:", "Assistant:", "\n\n"]
437
+ )
438
 
439
  logger.info(f"Raw AI response: {response[:200]}...")
440
 
441
+ # Clean up the response based on model type
442
+ if "llama" in self.model.lower():
443
+ # Clean up Llama response
444
+ if "<|assistant|>" in response:
445
+ assistant_response = response.split("<|assistant|>")[-1].strip()
446
+ else:
447
+ assistant_response = response.strip()
448
+
449
+ # Remove any remaining special tokens
450
+ assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").replace("<|system|>", "").strip()
451
  else:
452
+ # Clean up GPT-2 response
453
+ if "Assistant:" in response:
454
+ assistant_response = response.split("Assistant:")[-1].strip()
455
+ else:
456
+ assistant_response = response.strip()
457
+
458
+ # Remove any remaining special tokens
459
+ assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").strip()
460
 
461
  # Remove any incomplete sentences or cut-off text
462
  if assistant_response.endswith(('.', '!', '?')):