Legend-King / app.py
AiCoderv2's picture
Create app.py
52779c2 verified
raw
history blame
2.04 kB
import os
from datasets import load_dataset
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
# Step 1: Load dataset and save to CSV (auto-training data)
def load_and_save_dataset():
print("Loading dataset from Hugging Face...")
dataset = load_dataset("HuggingFaceFW/fineweb", split="train")
print("Saving dataset to data.csv...")
dataset.to_csv("data.csv")
print("Done! Data saved to data.csv.")
return "Dataset loaded and saved to data.csv."
# Run on startup
load_and_save_dataset()
# Step 2: Load GPT-2 model for inference
model_name = "gpt2" # or "distilgpt2" for faster inference
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Create generator pipeline
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=-1)
# Function to generate responses
def generate_response(prompt):
responses = generator(
prompt,
max_length=100,
do_sample=True,
temperature=0.7,
top_k=50,
top_p=0.95,
num_return_sequences=1
)
return responses[0]['generated_text'].strip()
# Step 3: Build Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## GPT-2 Based AI Assistant with Dataset Loaded from Hugging Face")
gr.Textbox(value="Loading dataset...", interactive=False, lines=2)
fetch_button = gr.Button("Load Dataset and Save CSV")
output_message = gr.Textbox()
def fetch_and_confirm():
msg = load_and_save_dataset()
return msg
fetch_button.click(fetch_and_confirm, outputs=output_message)
gr.Markdown("### Ask the AI Assistant")
prompt_input = gr.Textbox(label="Enter your prompt", placeholder="Say something...")
response_output = gr.Textbox(label="Response", lines=10)
def respond(prompt):
return generate_response(prompt)
gr.Button("Ask").click(respond, inputs=prompt_input, outputs=response_output)
demo.launch()