Training in progress, step 2

Files changed (4) hide show

config.json CHANGED Viewed

@@ -6,8 +6,9 @@
     "InnerLoopARMTForCausalLM"
   ],
   "attend_to_previous_input": false,
   "base_model_config": null,
-  "base_model_name": "meta-llama/Llama-3.2-1B",
   "constant_depth": false,
   "correction": true,
   "d_mem": 64,
@@ -16,17 +17,19 @@
   "gating": false,
   "layers_attr": "model.layers",
   "max_hop": 4,
   "model_type": "armt",
   "n_heads": 1,
   "noisy_halting": false,
   "num_mem_tokens": 32,
   "segment_alignment": "left",
   "segment_size": 1024,
-  "sliding_window": true,
   "time_penalty": 0.0,
-  "transformers_version": "4.57.3",
   "use_denom": true,
-  "use_sink": true,
   "wrap_layers": null,
   "wrap_pos": false
 }

     "InnerLoopARMTForCausalLM"
   ],
   "attend_to_previous_input": false,
+  "attn_implementation": "flash_attention_2",
   "base_model_config": null,
+  "base_model_name": "google/gemma-3-1b-it",
   "constant_depth": false,
   "correction": true,
   "d_mem": 64,
   "gating": false,
   "layers_attr": "model.layers",
   "max_hop": 4,
+  "memory_dtype": "bfloat16",
+  "model_dtype": "bfloat16",
   "model_type": "armt",
   "n_heads": 1,
   "noisy_halting": false,
   "num_mem_tokens": 32,
   "segment_alignment": "left",
   "segment_size": 1024,
+  "sliding_window_enabled": false,
   "time_penalty": 0.0,
+  "transformers_version": "4.57.1",
   "use_denom": true,
+  "use_sink": false,
   "wrap_layers": null,
   "wrap_pos": false
 }

generation_config.json ADDED Viewed

+{
+  "_from_model_config": true,
+  "transformers_version": "4.57.1"
+}

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f48e9347949725410f78861ac81dde0696c91c06e313a36d4e87691d68a00fd
-size 2089174366

 version https://git-lfs.github.com/spec/v1
+oid sha256:da9279b968be4943a203b5b6a8b05d247d339718a06e8c7443909d2b4493e260
+size 8210410

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d69749efec977196b030b4491ab2ab14c887415b735deff3f21eed4c29893e2
 size 6904

 version https://git-lfs.github.com/spec/v1
+oid sha256:d626653016cda103ce802f60b910429cd963b4cffdcfd7be723eb7cc723546fb
 size 6904