lihongjie commited on
Commit
b88b51a
·
1 Parent(s): 7c180e2
README.md CHANGED
@@ -74,7 +74,7 @@ pip install -r requirements.txt
74
  ##### start tokenizer server for image understand demo
75
 
76
  ```
77
- python3 tokenizer_images.py --port 8080
78
  ```
79
 
80
  ##### run image understand demo
@@ -148,7 +148,7 @@ image >> images/recoAll_attractions_1.jpg
148
  ##### start tokenizer server for image understand demo
149
 
150
  ```
151
- python tokenizer_video.py --port 8080
152
  ```
153
 
154
  ##### run video understand demo
 
74
  ##### start tokenizer server for image understand demo
75
 
76
  ```
77
+ python3 qwen3_tokenizer.py --port 8080
78
  ```
79
 
80
  ##### run image understand demo
 
148
  ##### start tokenizer server for image understand demo
149
 
150
  ```
151
+ python qwen3_tokenizer.py --port 8080
152
  ```
153
 
154
  ##### run video understand demo
main_ax650 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b9b7a1dd91f4b183324d86f537c1530a075bbb9d551cc8fc24d0158e9d513e1
3
- size 6660400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b33248b5bb595b58df9abb46c5e955d3ee8194989d728b942cd6d0691b071d3c
3
+ size 6651808
main_axcl_aarch64 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1f164f7a699a2973e116bcba3788c7649de2effe693127572c1ae0d3cfa36cd
3
- size 1794808
 
 
 
 
main_axcl_x86 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bfb7b9096a12a39ce1e6fe55f0f03a2565f46bf7d736601ca9bff358015b559
3
- size 1886480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cbb29839971a6851ee517b46a23172f81ca791b7344bd9ec0f2b4c31916d10b
3
+ size 1902816
tokenizer_video.py → qwen3_tokenizer.py RENAMED
@@ -71,7 +71,7 @@ def get_image_prompt_string(
71
  class Tokenizer_Http():
72
 
73
  def __init__(self):
74
-
75
  path = 'qwen3-vl-tokenizer'
76
  self.tokenizer = AutoTokenizer.from_pretrained(path,
77
  trust_remote_code=True,
@@ -82,10 +82,14 @@ class Tokenizer_Http():
82
  input_ids = self.tokenizer(text)
83
  return input_ids["input_ids"][0]
84
 
85
- def encode_vpm(self, content="Describe this image.", num_img=1, img_token_num=256):
86
 
87
  # official implementation
88
- imgs_token = '<|vision_start|>' + '<|video_pad|>'*img_token_num*num_img + '<|vision_end|>'
 
 
 
 
89
 
90
  text = f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{imgs_token}{content}<|im_end|>\n<|im_start|>assistant\n'
91
 
@@ -95,8 +99,18 @@ class Tokenizer_Http():
95
  return text_inputs["input_ids"].tolist()[0]
96
 
97
  def decode(self, token_ids):
98
- return self.tokenizer.decode(token_ids,
99
- clean_up_tokenization_spaces=False)
 
 
 
 
 
 
 
 
 
 
100
 
101
  @property
102
  def bos_id(self):
@@ -120,6 +134,10 @@ class Tokenizer_Http():
120
 
121
  @property
122
  def img_context_token(self):
 
 
 
 
123
  return self.tokenizer.encode("<|video_pad|>")[0]
124
 
125
  tokenizer = Tokenizer_Http()
@@ -180,6 +198,12 @@ class Request(BaseHTTPRequestHandler):
180
  msg = json.dumps({'img_context_token': -1})
181
  else:
182
  msg = json.dumps({'img_context_token': img_context_token})
 
 
 
 
 
 
183
  else:
184
  msg = 'error'
185
 
@@ -206,7 +230,7 @@ class Request(BaseHTTPRequestHandler):
206
  if 'img_prompt' in req:
207
  b_img_prompt = req['img_prompt']
208
  if b_img_prompt:
209
- token_ids = tokenizer.encode_vpm(prompt, req["num_img"], req["img_token_num"])
210
  else:
211
  token_ids = tokenizer.encode(prompt)
212
 
 
71
  class Tokenizer_Http():
72
 
73
  def __init__(self):
74
+ self.token_ids_cache = []
75
  path = 'qwen3-vl-tokenizer'
76
  self.tokenizer = AutoTokenizer.from_pretrained(path,
77
  trust_remote_code=True,
 
82
  input_ids = self.tokenizer(text)
83
  return input_ids["input_ids"][0]
84
 
85
+ def encode_vpm(self, content="Describe this image.", num_img=1, img_token_num=256, video_prompt=False):
86
 
87
  # official implementation
88
+ if video_prompt:
89
+ pad_token = '<|video_pad|>'
90
+ else:
91
+ pad_token = '<|image_pad|>'
92
+ imgs_token = '<|vision_start|>' + pad_token*img_token_num*num_img + '<|vision_end|>'
93
 
94
  text = f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{imgs_token}{content}<|im_end|>\n<|im_start|>assistant\n'
95
 
 
99
  return text_inputs["input_ids"].tolist()[0]
100
 
101
  def decode(self, token_ids):
102
+ self.token_ids_cache += token_ids
103
+ text = self.tokenizer.decode(self.token_ids_cache)
104
+ if "\ufffd" in text and len(self.token_ids_cache) < 9:
105
+ print("text 中包含非法字符")
106
+ return ""
107
+ else:
108
+ self.token_ids_cache.clear()
109
+ return text.replace("\ufffd","")
110
+
111
+ # def decode(self, token_ids):
112
+ # return self.tokenizer.decode(token_ids,
113
+ # clean_up_tokenization_spaces=False)
114
 
115
  @property
116
  def bos_id(self):
 
134
 
135
  @property
136
  def img_context_token(self):
137
+ return self.tokenizer.encode("<|image_pad|>")[0]
138
+
139
+ @property
140
+ def video_context_token(self):
141
  return self.tokenizer.encode("<|video_pad|>")[0]
142
 
143
  tokenizer = Tokenizer_Http()
 
198
  msg = json.dumps({'img_context_token': -1})
199
  else:
200
  msg = json.dumps({'img_context_token': img_context_token})
201
+ elif self.path == '/video_context_token':
202
+ video_context_token = tokenizer.video_context_token
203
+ if video_context_token is None:
204
+ msg = json.dumps({'video_context_token': -1})
205
+ else:
206
+ msg = json.dumps({'video_context_token': video_context_token})
207
  else:
208
  msg = 'error'
209
 
 
230
  if 'img_prompt' in req:
231
  b_img_prompt = req['img_prompt']
232
  if b_img_prompt:
233
+ token_ids = tokenizer.encode_vpm(prompt, req["num_img"], req["img_token_num"], req["video_prompt"])
234
  else:
235
  token_ids = tokenizer.encode(prompt)
236
 
tokenizer_images.py DELETED
@@ -1,244 +0,0 @@
1
- from transformers import AutoTokenizer, PreTrainedTokenizerFast
2
- from transformers.tokenization_utils_base import AddedToken
3
- from http.server import HTTPServer, BaseHTTPRequestHandler
4
- import json
5
- import argparse
6
-
7
- def _prompt_split_image(
8
- image_seq_len,
9
- image_rows,
10
- image_cols,
11
- fake_token_around_image,
12
- image_token,
13
- global_img_token,
14
- ):
15
- """Prompt with expanded image tokens for when the image is split into patches."""
16
- text_split_images = ""
17
- for n_h in range(image_rows):
18
- for n_w in range(image_cols):
19
- text_split_images += (
20
- f"{fake_token_around_image}"
21
- + f"<row_{n_h + 1}_col_{n_w + 1}>"
22
- + f"{image_token}" * image_seq_len
23
- )
24
- text_split_images += "\n"
25
-
26
- text_split_images += (
27
- f"\n{fake_token_around_image}"
28
- + f"{global_img_token}"
29
- + f"{image_token}" * image_seq_len
30
- + f"{fake_token_around_image}"
31
- )
32
- return text_split_images
33
-
34
-
35
- def _prompt_single_image(
36
- image_seq_len, fake_token_around_image, image_token, global_img_token
37
- ):
38
- """Prompt with expanded image tokens for a single image."""
39
- return (
40
- f"{fake_token_around_image}"
41
- + f"{global_img_token}"
42
- + f"{image_token}" * image_seq_len
43
- + f"{fake_token_around_image}"
44
- )
45
-
46
-
47
- def get_image_prompt_string(
48
- image_rows,
49
- image_cols,
50
- image_seq_len,
51
- fake_token_around_image,
52
- image_token,
53
- global_img_token,
54
- ):
55
- if image_rows == 0 and image_cols == 0:
56
- return _prompt_single_image(
57
- image_seq_len,
58
- fake_token_around_image=fake_token_around_image,
59
- image_token=image_token,
60
- global_img_token=global_img_token,
61
- )
62
- return _prompt_split_image(
63
- image_seq_len,
64
- image_rows,
65
- image_cols,
66
- fake_token_around_image,
67
- image_token,
68
- global_img_token,
69
- )
70
-
71
- class Tokenizer_Http():
72
-
73
- def __init__(self):
74
-
75
- path = 'qwen3-vl-tokenizer'
76
- self.tokenizer = AutoTokenizer.from_pretrained(path,
77
- trust_remote_code=True,
78
- use_fast=False)
79
-
80
- def encode(self, content):
81
- text = [f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n']
82
- input_ids = self.tokenizer(text)
83
- return input_ids["input_ids"][0]
84
-
85
- def encode_vpm(self, content="Describe this image.", num_img=1, img_token_num=256):
86
-
87
- # official implementation
88
- imgs_token = '<|vision_start|>' + '<|image_pad|>'*img_token_num + '<|vision_end|>'
89
- imgs_token *= num_img
90
- text = f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{imgs_token}{content}<|im_end|>\n<|im_start|>assistant\n'
91
-
92
- output_kwargs = {'text_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'images_kwargs': {'return_tensors': 'pt'}, 'audio_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'videos_kwargs': {'fps': 2.0, 'return_tensors': 'pt'}, 'common_kwargs': {'return_tensors': 'pt'}}
93
-
94
- text_inputs = self.tokenizer(text, **output_kwargs["text_kwargs"])
95
- return text_inputs["input_ids"].tolist()[0]
96
-
97
- def decode(self, token_ids):
98
- return self.tokenizer.decode(token_ids,
99
- clean_up_tokenization_spaces=False)
100
-
101
- @property
102
- def bos_id(self):
103
- return self.tokenizer.bos_token_id
104
-
105
- @property
106
- def eos_id(self):
107
- return self.tokenizer.eos_token_id
108
-
109
- @property
110
- def bos_token(self):
111
- return self.tokenizer.bos_token
112
-
113
- @property
114
- def eos_token(self):
115
- return self.tokenizer.eos_token
116
-
117
- @property
118
- def img_start_token(self):
119
- return self.tokenizer.encode("<|vision_start|>")[0]
120
-
121
- @property
122
- def img_context_token(self):
123
- return self.tokenizer.encode("<|image_pad|>")[0]
124
-
125
- tokenizer = Tokenizer_Http()
126
-
127
- print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id,
128
- tokenizer.eos_token)
129
- token_ids = tokenizer.encode_vpm()
130
- # [151644, 8948, 198, 56568, 104625, 100633, 104455, 104800, 101101, 32022, 102022, 99602, 100013, 9370, 90286, 21287, 42140, 53772, 35243, 26288, 104949, 3837, 105205, 109641, 67916, 30698, 11, 54851, 46944, 115404, 42192, 99441, 100623, 48692, 100168, 110498, 1773, 151645, 151644, 872, 198,
131
- # 151646,
132
- # 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648,
133
- # 151647,
134
- # 198, 5501, 7512, 279, 2168, 19620, 13, 151645, 151644, 77091, 198]
135
- # 118
136
- print(token_ids)
137
- print(len(token_ids))
138
- token_ids = tokenizer.encode("hello world")
139
- # [151644, 8948, 198, 56568, 104625, 100633, 104455, 104800, 101101, 32022, 102022, 99602, 100013, 9370, 90286, 21287, 42140, 53772, 35243, 26288, 104949, 3837, 105205, 109641, 67916, 30698, 11, 54851, 46944, 115404, 42192, 99441, 100623, 48692, 100168, 110498, 1773, 151645, 151644, 872, 198, 14990, 1879, 151645, 151644, 77091, 198]
140
- # 47
141
- print(token_ids)
142
- print(len(token_ids))
143
-
144
-
145
- class Request(BaseHTTPRequestHandler):
146
- #通过类继承,新定义类
147
- timeout = 5
148
- server_version = 'Apache'
149
-
150
- def do_GET(self):
151
- print(self.path)
152
- #在新类中定义get的内容(当客户端向该服务端使用get请求时,本服务端将如下运行)
153
- self.send_response(200)
154
- self.send_header("type", "get") #设置响应头,可省略或设置多个
155
- self.end_headers()
156
-
157
- if self.path == '/bos_id':
158
- bos_id = tokenizer.bos_id
159
- # print(bos_id)
160
- # to json
161
- if bos_id is None:
162
- msg = json.dumps({'bos_id': -1})
163
- else:
164
- msg = json.dumps({'bos_id': bos_id})
165
- elif self.path == '/eos_id':
166
- eos_id = tokenizer.eos_id
167
- if eos_id is None:
168
- msg = json.dumps({'eos_id': -1})
169
- else:
170
- msg = json.dumps({'eos_id': eos_id})
171
- elif self.path == '/img_start_token':
172
- img_start_token = tokenizer.img_start_token
173
- if img_start_token is None:
174
- msg = json.dumps({'img_start_token': -1})
175
- else:
176
- msg = json.dumps({'img_start_token': img_start_token})
177
- elif self.path == '/img_context_token':
178
- img_context_token = tokenizer.img_context_token
179
- if img_context_token is None:
180
- msg = json.dumps({'img_context_token': -1})
181
- else:
182
- msg = json.dumps({'img_context_token': img_context_token})
183
- else:
184
- msg = 'error'
185
-
186
- print(msg)
187
- msg = str(msg).encode() #转为str再转为byte格式
188
-
189
- self.wfile.write(msg) #将byte格式的信息返回给客户端
190
-
191
- def do_POST(self):
192
- #在新类中定义post的内容(当客户端向该服务端使用post请求时,本服务端将如下运行)
193
- data = self.rfile.read(int(
194
- self.headers['content-length'])) #获取从客户端传入的参数(byte格式)
195
- data = data.decode() #将byte格式转为str格式
196
-
197
- self.send_response(200)
198
- self.send_header("type", "post") #设置响应头,可省略或设置多个
199
- self.end_headers()
200
-
201
- if self.path == '/encode':
202
- req = json.loads(data)
203
- print(req)
204
- prompt = req['text']
205
- b_img_prompt = False
206
- if 'img_prompt' in req:
207
- b_img_prompt = req['img_prompt']
208
- if b_img_prompt:
209
- token_ids = tokenizer.encode_vpm(prompt, req["num_img"], req["img_token_num"])
210
- else:
211
- token_ids = tokenizer.encode(prompt)
212
-
213
- if token_ids is None:
214
- msg = json.dumps({'token_ids': -1})
215
- else:
216
- msg = json.dumps({'token_ids': token_ids})
217
-
218
- elif self.path == '/decode':
219
- req = json.loads(data)
220
- token_ids = req['token_ids']
221
- text = tokenizer.decode(token_ids)
222
- if text is None:
223
- msg = json.dumps({'text': ""})
224
- else:
225
- msg = json.dumps({'text': text})
226
- else:
227
- msg = 'error'
228
- print(msg)
229
- msg = str(msg).encode() #转为str再转为byte格式
230
-
231
- self.wfile.write(msg) #将byte格式的信息返回给客户端
232
-
233
-
234
- if __name__ == "__main__":
235
-
236
- args = argparse.ArgumentParser()
237
- args.add_argument('--host', type=str, default='localhost')
238
- args.add_argument('--port', type=int, default=8080)
239
- args = args.parse_args()
240
-
241
- host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1'
242
- print('http://%s:%s' % host)
243
- server = HTTPServer(host, Request) #根据地址端口号和新定义的类,创建服务器实例
244
- server.serve_forever() #开启服务