Spaces:
Sleeping
Sleeping
| { | |
| "model": { | |
| "name": "unsloth/phi-4-unsloth-bnb-4bit", | |
| "trust_remote_code": true, | |
| "use_fast_tokenizer": true | |
| }, | |
| "tokenizer": { | |
| "chat_template": "phi", | |
| "max_seq_length": 2048, | |
| "padding_side": "right", | |
| "add_eos_token": true | |
| }, | |
| "training": { | |
| "per_device_train_batch_size": 16, | |
| "gradient_accumulation_steps": 3, | |
| "learning_rate": 2e-5, | |
| "num_train_epochs": 3, | |
| "max_steps": -1, | |
| "logging_steps": 10, | |
| "save_steps": 200, | |
| "save_total_limit": 5, | |
| "push_to_hub": true, | |
| "hub_strategy": "every_save", | |
| "gradient_checkpointing": true, | |
| "optim": "adamw_torch", | |
| "lr_scheduler_type": "cosine", | |
| "warmup_ratio": 0.05, | |
| "weight_decay": 0.01, | |
| "max_grad_norm": 1.0, | |
| "neftune_noise_alpha": 5, | |
| "fp16": false, | |
| "bf16": true | |
| }, | |
| "checkpointing": { | |
| "output_dir": "./results", | |
| "save_strategy": "steps", | |
| "save_steps": 100, | |
| "save_total_limit": 3, | |
| "hub_strategy": "every_save" | |
| }, | |
| "unsloth": { | |
| "enabled": true, | |
| "r": 32, | |
| "alpha": 16, | |
| "dropout": 0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "distributed_training": { | |
| "fsdp_config": { | |
| "enabled": false, | |
| "sharding_strategy": "FULL_SHARD", | |
| "mixed_precision": "BF16", | |
| "activation_checkpointing": true, | |
| "offload_params": false | |
| }, | |
| "ddp_find_unused_parameters": false, | |
| "dataloader_num_workers": 2, | |
| "ddp_config": { | |
| "enabled": true, | |
| "backend": "nccl", | |
| "find_unused_parameters": false, | |
| "broadcast_buffers": false, | |
| "gradient_as_bucket_view": true | |
| } | |
| }, | |
| "logging": { | |
| "logging_steps": 50, | |
| "log_level": "info" | |
| }, | |
| "huggingface_hub": { | |
| "push_to_hub": true, | |
| "hub_model_id": "phi-4-cognitive-assistant", | |
| "hub_private_repo": true | |
| }, | |
| "model_name_or_path": "unsloth/phi-4-unsloth-bnb-4bit", | |
| "model_revision": "main", | |
| "use_flash_attention": true, | |
| "torch_dtype": "bfloat16", | |
| "bf16": true, | |
| "fp16": false, | |
| "hardware": { | |
| "hardware_name": "4xL4", | |
| "specs": { | |
| "gpu_count": 4, | |
| "gpu_type": "L4", | |
| "vram_per_gpu": 24, | |
| "total_vram": 96, | |
| "vcpu_count": 48, | |
| "ram": 186 | |
| }, | |
| "hardware_setup": { | |
| "use_cpu": false, | |
| "num_gpus": 4, | |
| "device_map": "auto" | |
| }, | |
| "training_optimizations": { | |
| "per_device_batch_size": 16, | |
| "gradient_accumulation_steps": 3, | |
| "mixed_precision": "bf16", | |
| "torch_compile": false, | |
| "memory_optimizations": { | |
| "use_gradient_checkpointing": true, | |
| "use_flash_attention": true | |
| }, | |
| "multi_gpu_strategy": "ddp" | |
| }, | |
| "system_settings": { | |
| "cuda_memory_fraction": 0.75, | |
| "dataloader_num_workers": 4, | |
| "dataloader_pin_memory": true | |
| }, | |
| "memory_breakdown": { | |
| "model_size": "~3.5GB (pre-quantized 4-bit)", | |
| "optimizer_states": "~1GB", | |
| "batch_memory_per_gpu": "~3GB", | |
| "peak_memory_estimate": "~18GB", | |
| "safe_headroom": "~6GB" | |
| }, | |
| "compute_environment": "L4_CLOUD" | |
| }, | |
| "dataset": { | |
| "dataset": { | |
| "name": "George-API/phi4-cognitive-dataset", | |
| "split": "train" | |
| }, | |
| "data_formatting": { | |
| "chat_template": "phi", | |
| "conversation_structure": { | |
| "system_identifier": "[RESEARCH INTRODUCTION]", | |
| "turn_order": ["human", "assistant"] | |
| }, | |
| "roles": { | |
| "system": "System: {content}\n\n", | |
| "human": "Human: {content}\n\n", | |
| "assistant": "Assistant: {content}\n\n" | |
| } | |
| }, | |
| "data_loading": { | |
| "batch_size": 24, | |
| "shuffle": false, | |
| "sequential_processing": true, | |
| "drop_last": false, | |
| "num_workers": 4, | |
| "pin_memory": true, | |
| "prefetch_factor": 4 | |
| }, | |
| "validation": { | |
| "log_samples": 3, | |
| "log_interval": 50, | |
| "verify_sequence_integrity": true, | |
| "metrics": ["processed", "skipped", "avg_tokens", "unique_articles"] | |
| } | |
| } | |
| } |