irodkin commited on
Commit
7f9dad9
·
verified ·
1 Parent(s): 273705e

Training in progress, step 2

Browse files
config.json CHANGED
@@ -6,8 +6,9 @@
6
  "InnerLoopARMTForCausalLM"
7
  ],
8
  "attend_to_previous_input": false,
 
9
  "base_model_config": null,
10
- "base_model_name": "meta-llama/Llama-3.2-1B",
11
  "constant_depth": false,
12
  "correction": true,
13
  "d_mem": 64,
@@ -16,17 +17,19 @@
16
  "gating": false,
17
  "layers_attr": "model.layers",
18
  "max_hop": 4,
 
 
19
  "model_type": "armt",
20
  "n_heads": 1,
21
  "noisy_halting": false,
22
  "num_mem_tokens": 32,
23
  "segment_alignment": "left",
24
  "segment_size": 1024,
25
- "sliding_window": true,
26
  "time_penalty": 0.0,
27
- "transformers_version": "4.57.3",
28
  "use_denom": true,
29
- "use_sink": true,
30
  "wrap_layers": null,
31
  "wrap_pos": false
32
  }
 
6
  "InnerLoopARMTForCausalLM"
7
  ],
8
  "attend_to_previous_input": false,
9
+ "attn_implementation": "flash_attention_2",
10
  "base_model_config": null,
11
+ "base_model_name": "google/gemma-3-1b-it",
12
  "constant_depth": false,
13
  "correction": true,
14
  "d_mem": 64,
 
17
  "gating": false,
18
  "layers_attr": "model.layers",
19
  "max_hop": 4,
20
+ "memory_dtype": "bfloat16",
21
+ "model_dtype": "bfloat16",
22
  "model_type": "armt",
23
  "n_heads": 1,
24
  "noisy_halting": false,
25
  "num_mem_tokens": 32,
26
  "segment_alignment": "left",
27
  "segment_size": 1024,
28
+ "sliding_window_enabled": false,
29
  "time_penalty": 0.0,
30
+ "transformers_version": "4.57.1",
31
  "use_denom": true,
32
+ "use_sink": false,
33
  "wrap_layers": null,
34
  "wrap_pos": false
35
  }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.57.1"
4
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f48e9347949725410f78861ac81dde0696c91c06e313a36d4e87691d68a00fd
3
- size 2089174366
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da9279b968be4943a203b5b6a8b05d247d339718a06e8c7443909d2b4493e260
3
+ size 8210410
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d69749efec977196b030b4491ab2ab14c887415b735deff3f21eed4c29893e2
3
  size 6904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d626653016cda103ce802f60b910429cd963b4cffdcfd7be723eb7cc723546fb
3
  size 6904