Spaces:
Runtime error
Runtime error
File size: 1,306 Bytes
a57357b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | {
"hardware_name": "2xA10G",
"specs": {
"gpu_count": 2,
"gpu_type": "A10G",
"vram_per_gpu": 24,
"total_vram": 48,
"vcpu_count": 24,
"ram": 92
},
"training_optimizations": {
"per_device_batch_size": 16,
"gradient_accumulation_steps": 4,
"effective_batch_size": 128,
"memory_optimizations": {
"use_gradient_checkpointing": true,
"pin_memory": true,
"num_workers": 2
},
"distributed_settings": {
"device_map": "auto",
"ddp_find_unused_parameters": false
}
},
"memory_breakdown": {
"model_size": "~3.5GB (pre-quantized 4-bit)",
"optimizer_states": "~1GB",
"batch_memory_per_gpu": "~2GB",
"peak_memory_estimate": "18-20GB",
"safe_headroom": "4-6GB"
},
"compute_environment": "A10G_CLOUD",
"distributed_type": "DATA_PARALLEL",
"mixed_precision": "bf16",
"num_gpus": 2,
"training_parameters": {
"per_device_train_batch_size": 16,
"gradient_accumulation_steps": 4,
"dataloader_num_workers": 2,
"dataloader_pin_memory": true,
"gradient_checkpointing": true,
"max_grad_norm": 1.0
},
"memory_optimization": {
"offload_to_cpu": false,
"use_flash_attention": true,
"use_gradient_checkpointing": true
}
} |