MODEL_ID = google/medgemma-27b-text-it PYTORCH_CUDA_ALLOC_CONF = expandable_segments:True,max_split_size_mb:64 FORCE_CPU_LLM = MODEL_FALLBACK_ID = google/medgemma-4b-it QUANT_MODE = 4bit HF_HUB_ENABLE_HF_TRANSFER = 1 HF_HOME = /data/econsult/hf_cache TRANSFORMERS_CACHE = /data/econsult/hf_cache