Spaces:
Sleeping
Sleeping
| import os | |
| from datasets import load_dataset | |
| import pandas as pd | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import gradio as gr | |
| # Step 1: Load dataset and save to CSV (auto-training data) | |
| def load_and_save_dataset(): | |
| print("Loading dataset from Hugging Face...") | |
| dataset = load_dataset("HuggingFaceFW/fineweb", split="train") | |
| print("Saving dataset to data.csv...") | |
| dataset.to_csv("data.csv") | |
| print("Done! Data saved to data.csv.") | |
| return "Dataset loaded and saved to data.csv." | |
| # Run on startup | |
| load_and_save_dataset() | |
| # Step 2: Load GPT-2 model for inference | |
| model_name = "gpt2" # or "distilgpt2" for faster inference | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| # Create generator pipeline | |
| generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=-1) | |
| # Function to generate responses | |
| def generate_response(prompt): | |
| responses = generator( | |
| prompt, | |
| max_length=100, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_k=50, | |
| top_p=0.95, | |
| num_return_sequences=1 | |
| ) | |
| return responses[0]['generated_text'].strip() | |
| # Step 3: Build Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## GPT-2 Based AI Assistant with Dataset Loaded from Hugging Face") | |
| gr.Textbox(value="Loading dataset...", interactive=False, lines=2) | |
| fetch_button = gr.Button("Load Dataset and Save CSV") | |
| output_message = gr.Textbox() | |
| def fetch_and_confirm(): | |
| msg = load_and_save_dataset() | |
| return msg | |
| fetch_button.click(fetch_and_confirm, outputs=output_message) | |
| gr.Markdown("### Ask the AI Assistant") | |
| prompt_input = gr.Textbox(label="Enter your prompt", placeholder="Say something...") | |
| response_output = gr.Textbox(label="Response", lines=10) | |
| def respond(prompt): | |
| return generate_response(prompt) | |
| gr.Button("Ask").click(respond, inputs=prompt_input, outputs=response_output) | |
| demo.launch() |