Dream / app.py
BeDream's picture
Update app.py
1591b66 verified
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import gradio as gr
# ====================== 1. Load Tokenizer & Tambahkan Token Khusus ======================
base_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
adapter_path = "BeDream/tuning-lora-tinyllama-1.1b" # folder LoRA di Space
tokenizer = AutoTokenizer.from_pretrained(adapter_path)
# Pastikan EOS dan PAD token tersedia dan berbeda
if tokenizer.eos_token is None:
tokenizer.eos_token = "<|endoftext|>"
tokenizer.add_special_tokens({'eos_token': tokenizer.eos_token})
if tokenizer.pad_token is None or tokenizer.pad_token == tokenizer.eos_token:
print("⚠️ pad_token sama dengan eos_token. Menambahkan pad_token baru...")
tokenizer.pad_token = '[PAD]'
tokenizer.add_special_tokens({'pad_token': tokenizer.pad_token})
# ====================== 2. Set Chat Template ======================
tokenizer.chat_template = """
{% for message in messages %}
{% if message['role'] == 'user' %}
{{ '<|user|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'system' %}
{{ '<|system|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}
{{ '<|assistant|>\n' + message['content'] + eos_token }}
{% endif %}
{% if loop.last and add_generation_prompt %}
{{ '<|assistant|>' }}
{% endif %}
{% endfor %}
"""
# ====================== 3. Load Model & Resize Token Embeddings ======================
model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
device_map="auto"
)
model.resize_token_embeddings(len(tokenizer)) # penting sebelum PEFT
# ====================== 4. Load PEFT Adapter ======================
model = PeftModel.from_pretrained(model, adapter_path)
# ====================== 5. Inference Function ======================
def chat_fn(message, history):
# Format pesan untuk chat template
messages = [{"role": "user", "content": message}]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
top_p=0.95,
temperature=0.7,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Ambil jawaban terakhir setelah <|assistant|>
if "<|assistant|>" in response:
response = response.split("<|assistant|>")[-1].strip()
return response
# ====================== 6. Launch Gradio Interface ======================
iface = gr.ChatInterface(fn=chat_fn)
iface.launch()