Spaces:
Sleeping
Sleeping
| { | |
| "model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", | |
| "dataset_name": "George-API/cognitive-data", | |
| "output_dir": "./results", | |
| "seed": 42, | |
| "# Tokenization settings": "These settings ensure we preserve existing tokenization", | |
| "trust_remote_code": true, | |
| "use_fast_tokenizer": true, | |
| "skip_tokenization": true, | |
| "max_seq_length": 2048, | |
| "chat_template": "chatml", | |
| "# Quantization settings": "4-bit quantization for memory efficiency", | |
| "load_in_4bit": true, | |
| "bnb_4bit_quant_type": "nf4", | |
| "bnb_4bit_compute_dtype": "float16", | |
| "bnb_4bit_use_double_quant": true, | |
| "# PEFT settings": "LoRA configuration for efficient fine-tuning", | |
| "use_peft": true, | |
| "lora_r": 16, | |
| "lora_alpha": 32, | |
| "lora_dropout": 0.05, | |
| "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], | |
| "# Training parameters": "Optimized for cognitive science fine-tuning", | |
| "num_train_epochs": 5, | |
| "per_device_train_batch_size": 4, | |
| "gradient_accumulation_steps": 8, | |
| "learning_rate": 3e-5, | |
| "weight_decay": 0.01, | |
| "warmup_ratio": 0.1, | |
| "lr_scheduler_type": "linear", | |
| "logging_steps": 10, | |
| "save_strategy": "steps", | |
| "save_steps": 100, | |
| "save_total_limit": 3, | |
| "fp16": true, | |
| "bf16": false, | |
| "max_grad_norm": 0.5, | |
| "# Hugging Face Hub settings": "For saving and sharing the model", | |
| "push_to_hub": true, | |
| "hub_model_id": "DeepSeek-Cognitive-Science", | |
| "hub_private_repo": true | |
| } |