mursalinir commited on
Commit
ee49de0
·
1 Parent(s): e674a8d

add first app.py

Browse files
Files changed (2) hide show
  1. app.py +35 -0
  2. requirements.txt +16 -0
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
2
+ from PIL import Image
3
+ import torch
4
+
5
+ device = "cuda" if torch.cuda.is_available() else "cpu"
6
+ print(f"Using device: {device}")
7
+
8
+ # Load model (fits in 12GB with fp16)
9
+ model_name = "Salesforce/instructblip-flan-t5-xl"
10
+ processor = InstructBlipProcessor.from_pretrained(model_name)
11
+ model = InstructBlipForConditionalGeneration.from_pretrained(
12
+ model_name,
13
+ torch_dtype=torch.float16,
14
+ device_map="auto"
15
+ )
16
+
17
+ # Load your image
18
+ image = Image.open("example.jpg").convert("RGB")
19
+
20
+ # Prompt to force paragraph-level description
21
+ prompt = (
22
+ "Describe this image in a detailed paragraph of 5-7 sentences. "
23
+ "Mention setting, objects, colors, actions, background details, and possible context."
24
+ )
25
+
26
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
27
+
28
+ out = model.generate(
29
+ **inputs,
30
+ max_new_tokens=250, # enough for multi-sentence
31
+ temperature=0.7,
32
+ top_p=0.9
33
+ )
34
+
35
+ print(processor.batch_decode(out, skip_special_tokens=True)[0])
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face ecosystem
2
+ transformers==4.41.2
3
+ accelerate==0.33.0
4
+ safetensors==0.4.3
5
+ sentencepiece==0.2.0
6
+
7
+ # Parameter-efficient fine-tuning (LoRA)
8
+ peft==0.11.1
9
+ bitsandbytes==0.43.1
10
+
11
+ # Utilities
12
+ pillow==10.3.0
13
+ numpy==1.26.4
14
+
15
+ # Demo UI
16
+ gradio==4.37.2