```python

import torch
from peft import PeftModel
from transformers import AutoProcessor, AutoModelForCausalLM

def create_model(model_name_or_path = 'microsoft/Phi-4-multimodal-instruct'):
    model = AutoModelForCausalLM.from_pretrained(
        model_name_or_path,
        device_map="auto",
        trust_remote_code=True,
        cache_dir = os.getenv('CACHE_DIR'),
        _attn_implementation='sdpa',
        torch_dtype = torch.bfloat16
    )
    return model

model_name_or_path = 'microsoft/Phi-4-multimodal-instruct'
model = create_model(model_name_or_path)

model.load_adapter("binhquoc/alm-add-phi4-non", adapter_name="speech")

model.set_adapter("speech")
processor = AutoProcessor.from_pretrained(
    model_name_or_path,
    trust_remote_code=True,
    cache_dir = os.getenv('CACHE_DIR'),
)
```