SAMTok

Runtime error

App Files Files Community

CultriX commited on Feb 1

Commit

8304eb6

verified ·

1 Parent(s): 80f498e

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -26

app.py CHANGED Viewed

@@ -18,7 +18,6 @@ import torchvision
 from torchvision.transforms.functional import to_pil_image
 from huggingface_hub import hf_hub_download
-import spaces
 import gradio as gr
 from transformers import SamModel, SamProcessor
@@ -26,6 +25,14 @@ from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
 from sam2 import VQ_SAM2, VQ_SAM2Config, SAM2Config
 from visualizer import sample_color, draw_mask
 class DirectResize:
     def __init__(self, target_length: int) -> None:
         self.target_length = target_length
@@ -110,9 +117,7 @@ def load_vq_sam2():
     if vq_sam2 is not None:
         return vq_sam2
-    if hasattr(torch, "set_default_device"):
-        torch.set_default_device("cpu")
     sam2_config = SAM2Config(
         ckpt_path=sam2_ckpt_local,
@@ -129,7 +134,7 @@ def load_vq_sam2():
     state = torch.load(mask_tokenizer_local, map_location="cpu")
     vq_sam2.load_state_dict(state)
-    vq_sam2 = vq_sam2.cuda().eval()
     return vq_sam2
 processor = AutoProcessor.from_pretrained(MODEL)
@@ -139,17 +144,18 @@ _qwen = None
 _sam = None
 def get_qwen():
-    """Must be called only inside @spaces.GPU function."""
     global _qwen
     if _qwen is None:
-        _qwen = Qwen3VLForConditionalGeneration.from_pretrained(MODEL, torch_dtype="auto").to("cuda").eval()
     return _qwen
 def get_sam():
-    """Must be called only inside @spaces.GPU function."""
     global _sam
     if _sam is None:
-        _sam = SamModel.from_pretrained("facebook/sam-vit-huge").to("cuda").eval()
     return _sam
 colors = sample_color()
@@ -185,7 +191,6 @@ def new_mu_state():
         "next_region_id": 1,
     }
-@spaces.GPU
 def mu_on_upload_image(media_path, mu_state):
     if not media_path:
         return new_mu_state(), None, None
@@ -195,9 +200,9 @@ def mu_on_upload_image(media_path, mu_state):
     img = Image.open(media_path).convert("RGB")
     w, h = img.size
-    inputs = sam_processor(img, return_tensors="pt").to("cuda")
     with torch.no_grad():
-        emb = sam_model.get_image_embeddings(inputs["pixel_values"])  # CUDA tensor
     st = new_mu_state()
     st["image_path"] = media_path
@@ -227,10 +232,10 @@ def mu_predict_mask_from_state(mu_state):
         input_points=[mu_state["points"]],
         input_labels=[mu_state["labels"]],
         return_tensors="pt",
-    ).to("cuda")
     # restore embedding to CUDA tensor, shape (1,256,64,64)
-    emb = torch.from_numpy(mu_state["image_embeddings"]).to("cuda")
     emb = emb.unsqueeze(0)
     with torch.no_grad():
@@ -253,7 +258,6 @@ def mu_predict_mask_from_state(mu_state):
     mask = (mask > 0).astype(np.float32)
     return mask
-@spaces.GPU
 def mu_add_point(evt: gr.SelectData, mu_state, is_positive: bool):
     if mu_state["image_path"] is None:
         return mu_state, None
@@ -266,7 +270,6 @@ def mu_add_point(evt: gr.SelectData, mu_state, is_positive: bool):
     mu_state["cur_mask"] = mask
     return mu_state, mask
-@spaces.GPU
 def mu_add_point_xy(xy, mu_state, is_positive: bool):
     if mu_state["image_path"] is None:
         return mu_state, None
@@ -293,7 +296,6 @@ def mu_clear_prompts(mu_state):
     mu_state["cur_mask"] = None
     return mu_state, None
-@spaces.GPU
 def mu_save_region(mu_state):
     if mu_state["cur_mask"] is None:
         return mu_state, gr.update(choices=[], value=None)
@@ -439,7 +441,6 @@ def replace_region_all(text: str, rid: str, token_str: str) -> str:
 def short_tag_from_codes(code_a: int, code_b: int) -> str:
     return f"<{code_a:04d}-{code_b:04d}>"
-@spaces.GPU
 def infer_understanding(mu_media, mu_query, mu_state):
     model = get_qwen()
@@ -500,7 +501,6 @@ def infer_understanding(mu_media, mu_query, mu_state):
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
-@spaces.GPU
 def infer_seg(media, query):
     model = get_qwen()
     vq_sam2 = load_vq_sam2()
@@ -642,7 +642,7 @@ def build_demo():
         gr.HTML(HEADER)
         with gr.Tab('Mask Generation'):
-            download_btn_1 = gr.DownloadButton(label='📦 Download', interactive=False, render=False)
             msk_1 = gr.AnnotatedImage(label='De-tokenized 2D masks', color_map=color_map, render=False)
             ans_1 = gr.HighlightedText(
                 label='Model Response', color_map=color_map_light, show_inline_category=False, render=False)
@@ -661,14 +661,14 @@ def build_demo():
                     )
                     with gr.Row():
-                        random_btn_1 = gr.Button(value='🔮 Random', visible=False)
-                        reset_btn_1 = gr.ClearButton([media_1, query_1, msk_1, ans_1], value='🗑️ Reset')
                         reset_btn_1.click(reset_seg, None, [sample_frames_1, download_btn_1])
                         download_btn_1.render()
-                        submit_btn_1 = gr.Button(value='🚀 Submit', variant='primary', elem_id='submit_1')
                 with gr.Column():
                     msk_1.render()
@@ -694,7 +694,7 @@ def build_demo():
             )
         with gr.Tab("Mask Understanding"):
             MU_INSTRUCTIONS = """
-            ### Mask Understanding — Instructions
             1. **Upload an image.**
             2. **Create a region mask**
@@ -789,7 +789,12 @@ def build_demo():
     return demo
 if __name__ == '__main__':
-    demo = build_demo()
     demo.queue()
-    demo.launch()

 from torchvision.transforms.functional import to_pil_image
 from huggingface_hub import hf_hub_download
 import gradio as gr
 from transformers import SamModel, SamProcessor
 from sam2 import VQ_SAM2, VQ_SAM2Config, SAM2Config
 from visualizer import sample_color, draw_mask
+# Set the device to use GPU and ensure CUDA is available
+DEVICE = "cuda"  # dedicated GPU runtime
+if not torch.cuda.is_available():
+    raise RuntimeError(
+        "CUDA is not available. Run the container with GPU access (e.g. --gpus all) "
+        "and ensure NVIDIA drivers + container runtime are installed."
+    )
 class DirectResize:
     def __init__(self, target_length: int) -> None:
         self.target_length = target_length
     if vq_sam2 is not None:
         return vq_sam2
     sam2_config = SAM2Config(
         ckpt_path=sam2_ckpt_local,
     state = torch.load(mask_tokenizer_local, map_location="cpu")
     vq_sam2.load_state_dict(state)
+    vq_sam2 = vq_sam2.to(DEVICE).eval()
     return vq_sam2
 processor = AutoProcessor.from_pretrained(MODEL)
 _sam = None
 def get_qwen():
     global _qwen
     if _qwen is None:
+        _qwen = Qwen3VLForConditionalGeneration.from_pretrained(
+            MODEL,
+            torch_dtype="auto",
+        ).to(DEVICE).eval()
     return _qwen
 def get_sam():
     global _sam
     if _sam is None:
+        _sam = SamModel.from_pretrained("facebook/sam-vit-huge").to(DEVICE).eval()
     return _sam
 colors = sample_color()
         "next_region_id": 1,
     }
 def mu_on_upload_image(media_path, mu_state):
     if not media_path:
         return new_mu_state(), None, None
     img = Image.open(media_path).convert("RGB")
     w, h = img.size
+    inputs = sam_processor(img, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
+        emb = sam_model.get_image_embeddings(inputs["pixel_values"])  # tensor on DEVICE
     st = new_mu_state()
     st["image_path"] = media_path
         input_points=[mu_state["points"]],
         input_labels=[mu_state["labels"]],
         return_tensors="pt",
+    ).to(DEVICE)
     # restore embedding to CUDA tensor, shape (1,256,64,64)
+    emb = torch.from_numpy(mu_state["image_embeddings"]).to(DEVICE)
     emb = emb.unsqueeze(0)
     with torch.no_grad():
     mask = (mask > 0).astype(np.float32)
     return mask
 def mu_add_point(evt: gr.SelectData, mu_state, is_positive: bool):
     if mu_state["image_path"] is None:
         return mu_state, None
     mu_state["cur_mask"] = mask
     return mu_state, mask
 def mu_add_point_xy(xy, mu_state, is_positive: bool):
     if mu_state["image_path"] is None:
         return mu_state, None
     mu_state["cur_mask"] = None
     return mu_state, None
 def mu_save_region(mu_state):
     if mu_state["cur_mask"] is None:
         return mu_state, gr.update(choices=[], value=None)
 def short_tag_from_codes(code_a: int, code_b: int) -> str:
     return f"<{code_a:04d}-{code_b:04d}>"
 def infer_understanding(mu_media, mu_query, mu_state):
     model = get_qwen()
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
 def infer_seg(media, query):
     model = get_qwen()
     vq_sam2 = load_vq_sam2()
         gr.HTML(HEADER)
         with gr.Tab('Mask Generation'):
+            download_btn_1 = gr.DownloadButton(label='ƒôª Download', interactive=False, render=False)
             msk_1 = gr.AnnotatedImage(label='De-tokenized 2D masks', color_map=color_map, render=False)
             ans_1 = gr.HighlightedText(
                 label='Model Response', color_map=color_map_light, show_inline_category=False, render=False)
                     )
                     with gr.Row():
+                        random_btn_1 = gr.Button(value='ƒö« Random', visible=False)
+                        reset_btn_1 = gr.ClearButton([media_1, query_1, msk_1, ans_1], value='ƒùæ´©Å Reset')
                         reset_btn_1.click(reset_seg, None, [sample_frames_1, download_btn_1])
                         download_btn_1.render()
+                        submit_btn_1 = gr.Button(value='ƒÜÇ Submit', variant='primary', elem_id='submit_1')
                 with gr.Column():
                     msk_1.render()
             )
         with gr.Tab("Mask Understanding"):
             MU_INSTRUCTIONS = """
+            ### Mask Understanding ÔÇö Instructions
             1. **Upload an image.**
             2. **Create a region mask**
     return demo
 if __name__ == '__main__':
+    # Warm-up: load all heavy models once at startup (dedicated GPU server)
+    get_qwen()
+    get_sam()
+    load_vq_sam2()
+    demo = build_demo()
     demo.queue()
+    port = int(os.getenv("PORT", "7860"))
+    demo.launch(server_name="0.0.0.0", server_port=port)