Spaces:
Sleeping
Sleeping
models : minor changes to the HF convert script (#157)
Browse files- models/convert-h5-to-ggml.py +12 -2
models/convert-h5-to-ggml.py
CHANGED
|
@@ -28,6 +28,7 @@ conv_map = {'self_attn_layer_norm': 'attn_ln',
|
|
| 28 |
'decoder.layer_norm.weight': 'decoder.ln.weight',
|
| 29 |
'decoder.embed_positions.weight': 'decoder.positional_embedding',
|
| 30 |
'decoder.embed_tokens.weight': 'decoder.token_embedding.weight',
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
|
@@ -82,8 +83,11 @@ fname_out = dir_out + "/ggml-model.bin"
|
|
| 82 |
with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
|
| 83 |
tokens = json.load(f)
|
| 84 |
|
| 85 |
-
|
| 86 |
use_f16 = True
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
fout = open(fname_out, "wb")
|
| 89 |
|
|
@@ -119,6 +123,8 @@ for key in tokens:
|
|
| 119 |
|
| 120 |
list_vars = model.state_dict()
|
| 121 |
for name in list_vars.keys():
|
|
|
|
|
|
|
| 122 |
if name == "proj_out.weight":
|
| 123 |
print('Skipping', name)
|
| 124 |
continue
|
|
@@ -126,7 +132,11 @@ for name in list_vars.keys():
|
|
| 126 |
src = name
|
| 127 |
|
| 128 |
nn = name
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
if nn[1] == "layers":
|
| 131 |
nn[1] = "blocks"
|
| 132 |
if ".".join(nn[3:-1]) == "self_attn.k_proj":
|
|
|
|
| 28 |
'decoder.layer_norm.weight': 'decoder.ln.weight',
|
| 29 |
'decoder.embed_positions.weight': 'decoder.positional_embedding',
|
| 30 |
'decoder.embed_tokens.weight': 'decoder.token_embedding.weight',
|
| 31 |
+
'proj_out.weight': 'decoder.proj.weight',
|
| 32 |
}
|
| 33 |
|
| 34 |
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
|
|
|
| 83 |
with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
|
| 84 |
tokens = json.load(f)
|
| 85 |
|
| 86 |
+
# use 16-bit or 32-bit floats
|
| 87 |
use_f16 = True
|
| 88 |
+
if len(sys.argv) > 4:
|
| 89 |
+
use_f16 = False
|
| 90 |
+
fname_out = dir_out + "/ggml-model-f32.bin"
|
| 91 |
|
| 92 |
fout = open(fname_out, "wb")
|
| 93 |
|
|
|
|
| 123 |
|
| 124 |
list_vars = model.state_dict()
|
| 125 |
for name in list_vars.keys():
|
| 126 |
+
# this seems to not be used
|
| 127 |
+
# ref: https://github.com/huggingface/transformers/blob/9a5b84a0076a04fe9596da72e8668069d4f09ea0/src/transformers/models/whisper/modeling_whisper.py#L1099-L1106
|
| 128 |
if name == "proj_out.weight":
|
| 129 |
print('Skipping', name)
|
| 130 |
continue
|
|
|
|
| 132 |
src = name
|
| 133 |
|
| 134 |
nn = name
|
| 135 |
+
if name != "proj_out.weight":
|
| 136 |
+
nn = nn.split(".")[1:]
|
| 137 |
+
else:
|
| 138 |
+
nn = nn.split(".")
|
| 139 |
+
|
| 140 |
if nn[1] == "layers":
|
| 141 |
nn[1] = "blocks"
|
| 142 |
if ".".join(nn[3:-1]) == "self_attn.k_proj":
|