Spaces:

anmoldograpsl
/

test_space

Sleeping

App Files Files Community

anmoldograpsl commited on Oct 14, 2024

Commit

10271df

verified ·

1 Parent(s): ca0ce8e

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -40

app.py CHANGED Viewed

@@ -1,42 +1,60 @@
-from huggingface_hub import login
 import os
-from peft import PeftModel, PeftConfig
-from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
-from PIL import Image
-import requests
-import torch
-import io
 import base64
-import cv2
-access_token = os.environ["HF_TOKEN"]
-login(token=access_token)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-dtype = torch.bfloat16
-config = PeftConfig.from_pretrained("anushettypsl/paligemma_vqav2")
-# base_model = AutoModelForCausalLM.from_pretrained("google/paligemma-3b-pt-448")
-base_model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma-3b-pt-448")
-model = PeftModel.from_pretrained(base_model, "anushettypsl/paligemma_vqav2", device_map=device)
-processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-448", device_map=device)
-model.to(device)
-image = cv2.imread('/content/15_BC_G2_6358_40x_2_jpg.rf.97595fa4965f66ad45be8fd055331933.jpg')
-# Convert the image to base64 encoding
-image_bytes = cv2.imencode('.jpg', image)[1]
-base64_string = base64.b64encode(image_bytes).decode('utf-8')
-input_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
-model_inputs = processor(
-    text=input_text, images=input_image, return_tensors="pt").to(device)
-input_len = model_inputs["input_ids"].shape[-1]
-model.to(device)
-with torch.inference_mode():
-    generation = model.generate(
-        **model_inputs, max_new_tokens=100, do_sample=False)
-    generation = generation[0][input_len:]
-    decoded = processor.decode(generation, skip_special_tokens=True)
-    print(decoded)

 import os
 import base64
+import torch
+import gradio as gr
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForCausalLM
+from peft import get_peft_model, LoraConfig, TaskType
+from huggingface_hub import login
+# Step 1: Log in to Hugging Face
+hf_token = os.getenv("HF_TOKEN")
+login(token=hf_token)
+# Step 2: Load the private model and processor
+model_name = "anushettypsl/paligemma_vqav2"  # Replace with the actual model link
+processor = AutoProcessor.from_pretrained(model_name)
+base_model = AutoModelForCausalLM.from_pretrained(model_name)
+# Step 3: Set up PEFT configuration (if needed)
+lora_config = LoraConfig(
+    r=16,  # Rank
+    lora_alpha=32,  # Scaling factor
+    lora_dropout=0.1,  # Dropout
+    task_type=TaskType.CAUSAL_LM,  # Adjust according to your model's task
+)
+# Step 4: Get the PEFT model
+peft_model = get_peft_model(base_model, lora_config)
+# Step 5: Define the prediction function
+def predict(image_base64, prompt):
+    # Decode the base64 image
+    image_data = base64.b64decode(image_base64)
+    image = Image.open(io.BytesIO(image_data))
+    # Process the image
+    inputs = processor( text=prompt,images=image, return_tensors="pt")
+    # Generate output using the model
+    with torch.no_grad():
+        output = peft_model.generate(**inputs)
+    # Decode the output to text
+    generated_text = processor.decode(output[0], skip_special_tokens=True)
+    return generated_text
+# Step 6: Create the Gradio interface
+interface = gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Textbox(label="Image (Base64)", placeholder="Enter base64 encoded image here...", lines=10),  # Base64 input for image
+        gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")  # Prompt input
+    ],
+    outputs="text",  # Text output
+    title="Image and Prompt to Text Model",
+    description="Enter a base64 encoded image and a prompt to generate a descriptive text."
+)
+# Step 7: Launch the Gradio app
+interface.launch()