Spaces:
Sleeping
Sleeping
Commit
·
4c10d31
1
Parent(s):
c80b7c6
Configure to use HUGGINGFAC_API_KEY_2 and Meta Llama 3.1 8B as default model
Browse files- DEPLOYMENT.md +8 -6
- app.py +52 -24
DEPLOYMENT.md
CHANGED
|
@@ -7,15 +7,17 @@
|
|
| 7 |
Set these environment variables in your Hugging Face Space settings:
|
| 8 |
|
| 9 |
```bash
|
| 10 |
-
# Required: Hugging Face API Key
|
| 11 |
-
|
| 12 |
|
| 13 |
-
# Optional: Default model (defaults to
|
| 14 |
-
DEFAULT_MODEL=
|
| 15 |
|
| 16 |
-
# Optional: Alternative
|
|
|
|
|
|
|
|
|
|
| 17 |
# DEFAULT_MODEL=distilgpt2
|
| 18 |
-
# DEFAULT_MODEL=microsoft/DialoGPT-medium
|
| 19 |
```
|
| 20 |
|
| 21 |
### 2. Files Structure
|
|
|
|
| 7 |
Set these environment variables in your Hugging Face Space settings:
|
| 8 |
|
| 9 |
```bash
|
| 10 |
+
# Required: Hugging Face API Key (use your secret variable name)
|
| 11 |
+
HUGGINGFAC_API_KEY_2=your_huggingface_api_key_here
|
| 12 |
|
| 13 |
+
# Optional: Default model (defaults to Llama 3.1 8B Instruct)
|
| 14 |
+
DEFAULT_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
| 15 |
|
| 16 |
+
# Optional: Alternative models
|
| 17 |
+
# DEFAULT_MODEL=meta-llama/Llama-3.2-1B-Instruct
|
| 18 |
+
# DEFAULT_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
| 19 |
+
# DEFAULT_MODEL=gpt2
|
| 20 |
# DEFAULT_MODEL=distilgpt2
|
|
|
|
| 21 |
```
|
| 22 |
|
| 23 |
### 2. Files Structure
|
app.py
CHANGED
|
@@ -330,14 +330,14 @@ class TextilindoAI:
|
|
| 330 |
"""Textilindo AI Assistant using HuggingFace Inference API"""
|
| 331 |
|
| 332 |
def __init__(self):
|
| 333 |
-
self.api_key = os.getenv('
|
| 334 |
-
# Use
|
| 335 |
-
self.model = os.getenv('DEFAULT_MODEL', '
|
| 336 |
self.system_prompt = self.load_system_prompt()
|
| 337 |
self.data_loader = TrainingDataLoader()
|
| 338 |
|
| 339 |
if not self.api_key:
|
| 340 |
-
logger.warning("
|
| 341 |
self.client = None
|
| 342 |
else:
|
| 343 |
try:
|
|
@@ -399,36 +399,64 @@ Minimum purchase is 1 roll (67-70 yards)."""
|
|
| 399 |
return self.get_fallback_response(user_message)
|
| 400 |
|
| 401 |
try:
|
| 402 |
-
# Use
|
| 403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
logger.info(f"Using model: {self.model}")
|
| 406 |
logger.info(f"API Key present: {bool(self.api_key)}")
|
| 407 |
|
| 408 |
logger.info(f"Generating response for prompt: {prompt[:100]}...")
|
| 409 |
|
| 410 |
-
# Generate response with
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
|
| 422 |
logger.info(f"Raw AI response: {response[:200]}...")
|
| 423 |
|
| 424 |
-
# Clean up the response
|
| 425 |
-
if "
|
| 426 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
else:
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
|
| 433 |
# Remove any incomplete sentences or cut-off text
|
| 434 |
if assistant_response.endswith(('.', '!', '?')):
|
|
|
|
| 330 |
"""Textilindo AI Assistant using HuggingFace Inference API"""
|
| 331 |
|
| 332 |
def __init__(self):
|
| 333 |
+
self.api_key = os.getenv('HUGGINGFAC_API_KEY_2')
|
| 334 |
+
# Use Meta Llama model with your API key
|
| 335 |
+
self.model = os.getenv('DEFAULT_MODEL', 'meta-llama/Llama-3.1-8B-Instruct')
|
| 336 |
self.system_prompt = self.load_system_prompt()
|
| 337 |
self.data_loader = TrainingDataLoader()
|
| 338 |
|
| 339 |
if not self.api_key:
|
| 340 |
+
logger.warning("HUGGINGFAC_API_KEY_2 not found. Using mock responses.")
|
| 341 |
self.client = None
|
| 342 |
else:
|
| 343 |
try:
|
|
|
|
| 399 |
return self.get_fallback_response(user_message)
|
| 400 |
|
| 401 |
try:
|
| 402 |
+
# Use Llama conversation format
|
| 403 |
+
if "llama" in self.model.lower():
|
| 404 |
+
prompt = f"<|system|>\n{self.system_prompt}\n<|user|>\n{user_message}\n<|assistant|>\n"
|
| 405 |
+
else:
|
| 406 |
+
# Fallback to GPT-2 format for other models
|
| 407 |
+
prompt = f"User: {user_message}\nAssistant:"
|
| 408 |
|
| 409 |
logger.info(f"Using model: {self.model}")
|
| 410 |
logger.info(f"API Key present: {bool(self.api_key)}")
|
| 411 |
|
| 412 |
logger.info(f"Generating response for prompt: {prompt[:100]}...")
|
| 413 |
|
| 414 |
+
# Generate response with Llama-optimized parameters
|
| 415 |
+
if "llama" in self.model.lower():
|
| 416 |
+
response = self.client.text_generation(
|
| 417 |
+
prompt,
|
| 418 |
+
max_new_tokens=200,
|
| 419 |
+
temperature=0.7,
|
| 420 |
+
top_p=0.9,
|
| 421 |
+
top_k=40,
|
| 422 |
+
repetition_penalty=1.1,
|
| 423 |
+
do_sample=True,
|
| 424 |
+
stop_sequences=["<|end|>", "<|user|>", "\n\n"]
|
| 425 |
+
)
|
| 426 |
+
else:
|
| 427 |
+
# GPT-2 parameters for other models
|
| 428 |
+
response = self.client.text_generation(
|
| 429 |
+
prompt,
|
| 430 |
+
max_new_tokens=150,
|
| 431 |
+
temperature=0.8,
|
| 432 |
+
top_p=0.9,
|
| 433 |
+
top_k=50,
|
| 434 |
+
repetition_penalty=1.2,
|
| 435 |
+
do_sample=True,
|
| 436 |
+
stop_sequences=["User:", "Assistant:", "\n\n"]
|
| 437 |
+
)
|
| 438 |
|
| 439 |
logger.info(f"Raw AI response: {response[:200]}...")
|
| 440 |
|
| 441 |
+
# Clean up the response based on model type
|
| 442 |
+
if "llama" in self.model.lower():
|
| 443 |
+
# Clean up Llama response
|
| 444 |
+
if "<|assistant|>" in response:
|
| 445 |
+
assistant_response = response.split("<|assistant|>")[-1].strip()
|
| 446 |
+
else:
|
| 447 |
+
assistant_response = response.strip()
|
| 448 |
+
|
| 449 |
+
# Remove any remaining special tokens
|
| 450 |
+
assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").replace("<|system|>", "").strip()
|
| 451 |
else:
|
| 452 |
+
# Clean up GPT-2 response
|
| 453 |
+
if "Assistant:" in response:
|
| 454 |
+
assistant_response = response.split("Assistant:")[-1].strip()
|
| 455 |
+
else:
|
| 456 |
+
assistant_response = response.strip()
|
| 457 |
+
|
| 458 |
+
# Remove any remaining special tokens
|
| 459 |
+
assistant_response = assistant_response.replace("<|end|>", "").replace("<|user|>", "").strip()
|
| 460 |
|
| 461 |
# Remove any incomplete sentences or cut-off text
|
| 462 |
if assistant_response.endswith(('.', '!', '?')):
|