qihoo360
/

RzenEmbed

binwang777 commited on Nov 4

Commit

3136022

1 Parent(s): e7e8b04

update model name

Files changed (3) hide show

README.md CHANGED Viewed

@@ -2,6 +2,11 @@
 RzenEmbed-v2-7B is a multimodal embedding model developed and open-sourced by 360CVGroup. It achieves state-of-the-art (SOTA) results on the MMEB-V2, MMEB-Visdoc, and MMEB-Video benchmarks (as of September 29, 2025).
 ### MMEB-V2
 | Model                    | Model Size (B) | Overall   | Image-Overall | Video-Overall | Visdoc-Overall |
@@ -63,7 +68,7 @@ Retrieve images that match text captions.
 ```python
 from rzen_embed_inference import RzenEmbed
-rzen = RzenEmbed("RzenAI/RzenEmbed-v2-7B")
 queries = [
     "A curious kitten and a gentle puppy share a moment of connection on the grass.",
@@ -93,7 +98,7 @@ Find text captions that best match given images.
 ```python
 from rzen_embed_inference import RzenEmbed
-rzen = RzenEmbed("RzenAI/RzenEmbed-v2-7B")
 queries = [
     "assets/example1.jpg",
@@ -121,7 +126,7 @@ Match text queries with document images for information retrieval.
 ```python
 from rzen_embed_inference import RzenEmbed
-rzen = RzenEmbed("RzenAI/RzenEmbed-v2-7B")
 queries = [
     "What is the main variable being analyzed on the x-axis of these graphs?",
@@ -168,7 +173,7 @@ def extract_frames(video_path, num_frames):
     cap.release()
     return frames
-rzen = RzenEmbed("RzenAI/RzenEmbed-v2-7B")
 queries = [
     "A traditional boat glides along a river lined with blooming cherry blossoms under an overcast sky in a modern cityscape.",

 RzenEmbed-v2-7B is a multimodal embedding model developed and open-sourced by 360CVGroup. It achieves state-of-the-art (SOTA) results on the MMEB-V2, MMEB-Visdoc, and MMEB-Video benchmarks (as of September 29, 2025).
+[![arXiv](https://img.shields.io/badge/arXiv-2510.27350-b31b1b.svg)](https://arxiv.org/abs/2510.27350)
+[![GitHub](https://img.shields.io/badge/GitHub-Repository-blue?logo=github)](https://github.com/360CVGroup/RzenEmbed)
+[![Benchmark](https://img.shields.io/badge/MMEB-Benchmark-blue.svg)](https://huggingface.co/spaces/TIGER-Lab/MMEB-Leaderboard)
 ### MMEB-V2
 | Model                    | Model Size (B) | Overall   | Image-Overall | Video-Overall | Visdoc-Overall |
 ```python
 from rzen_embed_inference import RzenEmbed
+rzen = RzenEmbed("qihoo360/RzenEmbed")
 queries = [
     "A curious kitten and a gentle puppy share a moment of connection on the grass.",
 ```python
 from rzen_embed_inference import RzenEmbed
+rzen = RzenEmbed("qihoo360/RzenEmbed")
 queries = [
     "assets/example1.jpg",
 ```python
 from rzen_embed_inference import RzenEmbed
+rzen = RzenEmbed("qihoo360/RzenEmbed")
 queries = [
     "What is the main variable being analyzed on the x-axis of these graphs?",
     cap.release()
     return frames
+rzen = RzenEmbed("qihoo360/RzenEmbed")
 queries = [
     "A traditional boat glides along a river lined with blooming cherry blossoms under an overcast sky in a modern cityscape.",

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "RzenAI/RzenEmbed-v2-7B",
   "architectures": [
     "Qwen2VLForConditionalGeneration"
   ],

 {
+  "_name_or_path": "qihoo360/RzenEmbed",
   "architectures": [
     "Qwen2VLForConditionalGeneration"
   ],

rzen_embed_inference.py CHANGED Viewed

@@ -17,7 +17,7 @@ from transformers.models.qwen2_vl import Qwen2VLForConditionalGeneration
 class RzenEmbed(nn.Module):
     def __init__(
         self,
-        model_name: str = "RzenAI/RzenEmbed-v2-7B",
         model_path: Optional[str] = None,
         device: str = "cuda" if torch.cuda.is_available() else "cpu",
         min_image_tokens=256,
@@ -345,7 +345,7 @@ def fetch_image(image: str | Image.Image, size_factor: int = IMAGE_FACTOR) -> Im
 if __name__ == '__main__':
-    rzen = RzenEmbed("RzenAI/RzenEmbed-v2-7B")
     queries = [
         "A curious kitten and a gentle puppy share a moment of connection on the grass.",

 class RzenEmbed(nn.Module):
     def __init__(
         self,
+        model_name: str = "qihoo360/RzenEmbed",
         model_path: Optional[str] = None,
         device: str = "cuda" if torch.cuda.is_available() else "cpu",
         min_image_tokens=256,
 if __name__ == '__main__':
+    rzen = RzenEmbed("qihoo360/RzenEmbed")
     queries = [
         "A curious kitten and a gentle puppy share a moment of connection on the grass.",