ggerganov commited on
Commit
f500082
·
unverified ·
1 Parent(s): 3568438

models : minor changes to the HF convert script (#157)

Browse files
Files changed (1) hide show
  1. models/convert-h5-to-ggml.py +12 -2
models/convert-h5-to-ggml.py CHANGED
@@ -28,6 +28,7 @@ conv_map = {'self_attn_layer_norm': 'attn_ln',
28
  'decoder.layer_norm.weight': 'decoder.ln.weight',
29
  'decoder.embed_positions.weight': 'decoder.positional_embedding',
30
  'decoder.embed_tokens.weight': 'decoder.token_embedding.weight',
 
31
  }
32
 
33
  # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
@@ -82,8 +83,11 @@ fname_out = dir_out + "/ggml-model.bin"
82
  with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
83
  tokens = json.load(f)
84
 
85
-
86
  use_f16 = True
 
 
 
87
 
88
  fout = open(fname_out, "wb")
89
 
@@ -119,6 +123,8 @@ for key in tokens:
119
 
120
  list_vars = model.state_dict()
121
  for name in list_vars.keys():
 
 
122
  if name == "proj_out.weight":
123
  print('Skipping', name)
124
  continue
@@ -126,7 +132,11 @@ for name in list_vars.keys():
126
  src = name
127
 
128
  nn = name
129
- nn = nn.split(".")[1:]
 
 
 
 
130
  if nn[1] == "layers":
131
  nn[1] = "blocks"
132
  if ".".join(nn[3:-1]) == "self_attn.k_proj":
 
28
  'decoder.layer_norm.weight': 'decoder.ln.weight',
29
  'decoder.embed_positions.weight': 'decoder.positional_embedding',
30
  'decoder.embed_tokens.weight': 'decoder.token_embedding.weight',
31
+ 'proj_out.weight': 'decoder.proj.weight',
32
  }
33
 
34
  # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
 
83
  with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
84
  tokens = json.load(f)
85
 
86
+ # use 16-bit or 32-bit floats
87
  use_f16 = True
88
+ if len(sys.argv) > 4:
89
+ use_f16 = False
90
+ fname_out = dir_out + "/ggml-model-f32.bin"
91
 
92
  fout = open(fname_out, "wb")
93
 
 
123
 
124
  list_vars = model.state_dict()
125
  for name in list_vars.keys():
126
+ # this seems to not be used
127
+ # ref: https://github.com/huggingface/transformers/blob/9a5b84a0076a04fe9596da72e8668069d4f09ea0/src/transformers/models/whisper/modeling_whisper.py#L1099-L1106
128
  if name == "proj_out.weight":
129
  print('Skipping', name)
130
  continue
 
132
  src = name
133
 
134
  nn = name
135
+ if name != "proj_out.weight":
136
+ nn = nn.split(".")[1:]
137
+ else:
138
+ nn = nn.split(".")
139
+
140
  if nn[1] == "layers":
141
  nn[1] = "blocks"
142
  if ".".join(nn[3:-1]) == "self_attn.k_proj":