Spaces:
Sleeping
Sleeping
| { | |
| "hardware_name": "4xL4", | |
| "specs": { | |
| "gpu_count": 4, | |
| "gpu_type": "L4", | |
| "vram_per_gpu": 24, | |
| "total_vram": 96, | |
| "vcpu_count": 48, | |
| "ram": 186 | |
| }, | |
| "training_optimizations": { | |
| "per_device_batch_size": 24, | |
| "gradient_accumulation_steps": 2, | |
| "effective_batch_size": 192, | |
| "memory_optimizations": { | |
| "use_gradient_checkpointing": true, | |
| "pin_memory": true, | |
| "num_workers": 4, | |
| "use_flash_attention": true | |
| }, | |
| "distributed_settings": { | |
| "device_map": "auto", | |
| "ddp_find_unused_parameters": false, | |
| "use_fsdp": true, | |
| "fsdp_config": { | |
| "sharding_strategy": "FULL_SHARD", | |
| "mixed_precision": "BF16", | |
| "activation_checkpointing": true | |
| } | |
| } | |
| }, | |
| "memory_breakdown": { | |
| "model_size": "~3.5GB (pre-quantized 4-bit)", | |
| "optimizer_states": "~1GB", | |
| "batch_memory_per_gpu": "~3GB", | |
| "peak_memory_estimate": "~18GB", | |
| "safe_headroom": "~6GB" | |
| }, | |
| "compute_environment": "L4_CLOUD", | |
| "distributed_type": "FSDP", | |
| "mixed_precision": "bf16", | |
| "num_gpus": 4, | |
| "training_parameters": { | |
| "per_device_train_batch_size": 24, | |
| "gradient_accumulation_steps": 2, | |
| "dataloader_num_workers": 4, | |
| "dataloader_pin_memory": true, | |
| "gradient_checkpointing": true, | |
| "max_grad_norm": 1.0 | |
| }, | |
| "memory_optimization": { | |
| "offload_to_cpu": false, | |
| "use_flash_attention": true, | |
| "use_gradient_checkpointing": true | |
| } | |
| } |