Spaces:

George-API
/

phi4training

Sleeping

phi4training / hardware_config.json

Upload folder using huggingface_hub

a57357b verified 12 months ago

1.31 kB

	{
	"hardware_name": "2xA10G",
	"specs": {
	"gpu_count": 2,
	"gpu_type": "A10G",
	"vram_per_gpu": 24,
	"total_vram": 48,
	"vcpu_count": 24,
	"ram": 92
	},
	"training_optimizations": {
	"per_device_batch_size": 16,
	"gradient_accumulation_steps": 4,
	"effective_batch_size": 128,
	"memory_optimizations": {
	"use_gradient_checkpointing": true,
	"pin_memory": true,
	"num_workers": 2
	},
	"distributed_settings": {
	"device_map": "auto",
	"ddp_find_unused_parameters": false
	}
	},
	"memory_breakdown": {
	"model_size": "~3.5GB (pre-quantized 4-bit)",
	"optimizer_states": "~1GB",
	"batch_memory_per_gpu": "~2GB",
	"peak_memory_estimate": "18-20GB",
	"safe_headroom": "4-6GB"
	},
	"compute_environment": "A10G_CLOUD",
	"distributed_type": "DATA_PARALLEL",
	"mixed_precision": "bf16",
	"num_gpus": 2,
	"training_parameters": {
	"per_device_train_batch_size": 16,
	"gradient_accumulation_steps": 4,
	"dataloader_num_workers": 2,
	"dataloader_pin_memory": true,
	"gradient_checkpointing": true,
	"max_grad_norm": 1.0
	},
	"memory_optimization": {
	"offload_to_cpu": false,
	"use_flash_attention": true,
	"use_gradient_checkpointing": true
	}
	}