Spaces:

harismlnaslm
/

Textilindo-AI

Sleeping

App Files Files Community

harismlnaslm commited on Oct 27

Commit

4c10d31

1 Parent(s): c80b7c6

Configure to use HUGGINGFAC_API_KEY_2 and Meta Llama 3.1 8B as default model

Browse files

Files changed (2) hide show

DEPLOYMENT.md +8 -6
app.py +52 -24

DEPLOYMENT.md CHANGED Viewed

@@ -7,15 +7,17 @@
 Set these environment variables in your Hugging Face Space settings:
 ```bash
-# Required: Hugging Face API Key
-HUGGINGFACE_API_KEY=your_huggingface_api_key_here
-# Optional: Default model (defaults to gpt2)
-DEFAULT_MODEL=gpt2
-# Optional: Alternative lightweight models
 # DEFAULT_MODEL=distilgpt2
-# DEFAULT_MODEL=microsoft/DialoGPT-medium
 ```
 ### 2. Files Structure

 Set these environment variables in your Hugging Face Space settings:
 ```bash
+# Required: Hugging Face API Key (use your secret variable name)
+HUGGINGFAC_API_KEY_2=your_huggingface_api_key_here
+# Optional: Default model (defaults to Llama 3.1 8B Instruct)
+DEFAULT_MODEL=meta-llama/Llama-3.1-8B-Instruct
+# Optional: Alternative models
+# DEFAULT_MODEL=meta-llama/Llama-3.2-1B-Instruct
+# DEFAULT_MODEL=meta-llama/Llama-3.2-3B-Instruct
+# DEFAULT_MODEL=gpt2
 # DEFAULT_MODEL=distilgpt2
 ```
 ### 2. Files Structure

app.py CHANGED Viewed

@@ -330,14 +330,14 @@ class TextilindoAI:
     """Textilindo AI Assistant using HuggingFace Inference API"""
     def __init__(self):
-        self.api_key = os.getenv('HUGGINGFACE_API_KEY')
-        # Use a model available on free HuggingFace Inference API
-        self.model = os.getenv('DEFAULT_MODEL', 'gpt2')  # Use GPT-2 which is available
         self.system_prompt = self.load_system_prompt()
         self.data_loader = TrainingDataLoader()
         if not self.api_key:
-            logger.warning("HUGGINGFACE_API_KEY not found. Using mock responses.")
             self.client = None
         else:
             try:
@@ -399,36 +399,64 @@ Minimum purchase is 1 roll (67-70 yards)."""
             return self.get_fallback_response(user_message)
         try:
-            # Use GPT-2 conversation format
-            prompt = f"User: {user_message}\nAssistant:"
             logger.info(f"Using model: {self.model}")
             logger.info(f"API Key present: {bool(self.api_key)}")
             logger.info(f"Generating response for prompt: {prompt[:100]}...")
-            # Generate response with GPT-2 parameters
-            response = self.client.text_generation(
-                prompt,
-                max_new_tokens=150,
-                temperature=0.8,
-                top_p=0.9,
-                top_k=50,
-                repetition_penalty=1.2,
-                do_sample=True,
-                stop_sequences=["User:", "Assistant:", "\n\n"]
-            )
             logger.info(f"Raw AI response: {response[:200]}...")
-            # Clean up the response for GPT-2
-            if "Assistant:" in response:
-                assistant_response = response.split("Assistant:")[-1].strip()
             else:
-                assistant_response = response.strip()
-            # Remove any remaining special tokens and clean up
-            assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").strip()
             # Remove any incomplete sentences or cut-off text
             if assistant_response.endswith(('.', '!', '?')):

     """Textilindo AI Assistant using HuggingFace Inference API"""
     def __init__(self):
+        self.api_key = os.getenv('HUGGINGFAC_API_KEY_2')
+        # Use Meta Llama model with your API key
+        self.model = os.getenv('DEFAULT_MODEL', 'meta-llama/Llama-3.1-8B-Instruct')
         self.system_prompt = self.load_system_prompt()
         self.data_loader = TrainingDataLoader()
         if not self.api_key:
+            logger.warning("HUGGINGFAC_API_KEY_2 not found. Using mock responses.")
             self.client = None
         else:
             try:
             return self.get_fallback_response(user_message)
         try:
+            # Use Llama conversation format
+            if "llama" in self.model.lower():
+                prompt = f"<|system|>\n{self.system_prompt}\n<|user|>\n{user_message}\n<|assistant|>\n"
+            else:
+                # Fallback to GPT-2 format for other models
+                prompt = f"User: {user_message}\nAssistant:"
             logger.info(f"Using model: {self.model}")
             logger.info(f"API Key present: {bool(self.api_key)}")
             logger.info(f"Generating response for prompt: {prompt[:100]}...")
+            # Generate response with Llama-optimized parameters
+            if "llama" in self.model.lower():
+                response = self.client.text_generation(
+                    prompt,
+                    max_new_tokens=200,
+                    temperature=0.7,
+                    top_p=0.9,
+                    top_k=40,
+                    repetition_penalty=1.1,
+                    do_sample=True,
+                    stop_sequences=["<|end|>", "<|user|>", "\n\n"]
+                )
+            else:
+                # GPT-2 parameters for other models
+                response = self.client.text_generation(
+                    prompt,
+                    max_new_tokens=150,
+                    temperature=0.8,
+                    top_p=0.9,
+                    top_k=50,
+                    repetition_penalty=1.2,
+                    do_sample=True,
+                    stop_sequences=["User:", "Assistant:", "\n\n"]
+                )
             logger.info(f"Raw AI response: {response[:200]}...")
+            # Clean up the response based on model type
+            if "llama" in self.model.lower():
+                # Clean up Llama response
+                if "<|assistant|>" in response:
+                    assistant_response = response.split("<|assistant|>")[-1].strip()
+                else:
+                    assistant_response = response.strip()
+                # Remove any remaining special tokens
+                assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").replace("<|system|>", "").strip()
             else:
+                # Clean up GPT-2 response
+                if "Assistant:" in response:
+                    assistant_response = response.split("Assistant:")[-1].strip()
+                else:
+                    assistant_response = response.strip()
+                # Remove any remaining special tokens
+                assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").strip()
             # Remove any incomplete sentences or cut-off text
             if assistant_response.endswith(('.', '!', '?')):