I finetuned (or further pretrained) the Model OpenChat (a Mistral 7B finetuning) on my own data. This worked well and the inference produces nice results. Now I want to merge the adapter weights with the original model, to quantize the model in a further step. The issue is that calling model.merge_and_unload() produces the error:
"AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".
Is there a way to fix this or another method to merge my weight adapters with the original model?
Here is my code:
Training
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=8,
lora_alpha=32,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
data_train = ...
import transformers
from trl import SFTTrainer
from transformers import TrainingArguments
tokenizer.pad_token = tokenizer.eos_token
trainer = transformers.Trainer(
model=model,
train_dataset=data_train,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=2,
#eval_steps=100,
logging_dir="./logs",
#max_steps=10,
num_train_epochs=1,
#evaluation_strategy="steps",
logging_strategy="steps",
learning_rate=2e-4,
fp16=True,
logging_steps=5 ,
save_total_limit=3,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()
trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")
Merging
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training
)
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model_name = "pretrained_model"
config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.merge_and_unload()
If you try
print(model), you should get a description of the model, something like this:If lora is mentioned, you can see that LoRa is already applied to the base model. The MistralForCausalLM class does not know anything about LoRa, so you can't call "merge_and_unload" on it.
PeftModel does have the
merge_and_unloadmethod, so you need to use that: