Spaces:

George-API
/

phi4training

Sleeping

phi4training / hardware_config.json

Upload folder using huggingface_hub

adb15f9 verified 9 months ago

1.45 kB

	{
	"hardware_name": "4xL4",
	"specs": {
	"gpu_count": 4,
	"gpu_type": "L4",
	"vram_per_gpu": 24,
	"total_vram": 96,
	"vcpu_count": 48,
	"ram": 186
	},
	"training_optimizations": {
	"per_device_batch_size": 24,
	"gradient_accumulation_steps": 2,
	"effective_batch_size": 192,
	"memory_optimizations": {
	"use_gradient_checkpointing": true,
	"pin_memory": true,
	"num_workers": 4,
	"use_flash_attention": true
	},
	"distributed_settings": {
	"device_map": "auto",
	"ddp_find_unused_parameters": false,
	"use_fsdp": true,
	"fsdp_config": {
	"sharding_strategy": "FULL_SHARD",
	"mixed_precision": "BF16",
	"activation_checkpointing": true
	}
	}
	},
	"memory_breakdown": {
	"model_size": "~3.5GB (pre-quantized 4-bit)",
	"optimizer_states": "~1GB",
	"batch_memory_per_gpu": "~3GB",
	"peak_memory_estimate": "~18GB",
	"safe_headroom": "~6GB"
	},
	"compute_environment": "L4_CLOUD",
	"distributed_type": "FSDP",
	"mixed_precision": "bf16",
	"num_gpus": 4,
	"training_parameters": {
	"per_device_train_batch_size": 24,
	"gradient_accumulation_steps": 2,
	"dataloader_num_workers": 4,
	"dataloader_pin_memory": true,
	"gradient_checkpointing": true,
	"max_grad_norm": 1.0
	},
	"memory_optimization": {
	"offload_to_cpu": false,
	"use_flash_attention": true,
	"use_gradient_checkpointing": true
	}
	}