-
Notifications
You must be signed in to change notification settings - Fork 11.9k
Add LoRA support #820
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Add LoRA support #820
Changes from all commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
f52101e
Add lora support
slaren ac3fbe4
Export lora A matrix pre-transposed
slaren 7136ada
Add support for quantized models
slaren dc65707
Use the work buffer instead to fix MSVC build
slaren 87c518b
Update exporter and support scaling
slaren c920f00
Add compatibility with #801
slaren c45868b
Support more layer types, fix memory and generation issues
slaren 57627f0
Rebase to master
slaren c150e1b
Add support for using a different base model
slaren fc89916
Fix windows build
slaren 14858ba
Show warning when using a quantized base model
slaren 3df343b
ggml_cpy: use the work buffer instead of alloca when quantizing
slaren 63da54e
Only attempt to use mmap for the lora base model if it is supported
slaren 0a6d5ad
Reuse definitions from convert.py
slaren 8d37db3
ggml_add: Add more checks
slaren File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import json | ||
import os | ||
import re | ||
import struct | ||
import sys | ||
from typing import Any, Dict, Sequence, TextIO | ||
|
||
import torch | ||
|
||
from convert import DATA_TYPE_TO_FTYPE, NUMPY_TYPE_TO_DATA_TYPE, DataType | ||
|
||
HF_SUBLAYER_TO_GGML = { | ||
"self_attn.q_proj": "attention.wq", | ||
"self_attn.k_proj": "attention.wk", | ||
"self_attn.v_proj": "attention.wv", | ||
"self_attn.o_proj": "attention.wo", | ||
"mlp.gate_proj": "feed_forward.w1", | ||
"mlp.down_proj": "feed_forward.w2", | ||
"mlp.up_proj": "feed_forward.w3", | ||
"input_layernorm": "attention_norm", | ||
"post_attention_layernorm": "ffn_norm", | ||
# "norm": "norm", | ||
# "embed_tokens": "tok_embeddings", | ||
# "lm_head": "output", | ||
} | ||
|
||
|
||
def translate_tensor_name(t: str) -> str: | ||
match = re.match(r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t) | ||
if match: | ||
nn = match.group(1) | ||
sub_layer = match.group(2) | ||
lora_type = match.group(3) | ||
|
||
sub_layer_renamed = HF_SUBLAYER_TO_GGML.get(sub_layer) | ||
if sub_layer_renamed is None: | ||
print(f"Error: unrecognized sub-layer {sub_layer} in tensor {t}") | ||
sys.exit(1) | ||
|
||
output_string = ( | ||
f"layers.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}" | ||
) | ||
return output_string | ||
else: | ||
print(f"Error: unrecognized tensor {t}") | ||
sys.exit(1) | ||
|
||
|
||
def write_file_header(fout: TextIO, params: Dict[str, Any]) -> None: | ||
fout.write(b"ggla"[::-1]) # magic (ggml lora) | ||
fout.write(struct.pack("i", 1)) # file version | ||
fout.write(struct.pack("ii", params["r"], params["lora_alpha"])) | ||
|
||
|
||
def write_tensor_header( | ||
self, name: str, shape: Sequence[int], data_type: DataType | ||
) -> None: | ||
sname = name.encode("utf-8") | ||
fout.write( | ||
struct.pack( | ||
"iii", | ||
len(shape), | ||
len(sname), | ||
DATA_TYPE_TO_FTYPE[NUMPY_TYPE_TO_DATA_TYPE[data_type]], | ||
) | ||
) | ||
fout.write(struct.pack("i" * len(shape), *shape[::-1])) | ||
fout.write(sname) | ||
fout.seek((fout.tell() + 31) & -32) | ||
|
||
|
||
if len(sys.argv) != 2: | ||
print(f"Usage: python {sys.argv[0]} <path>") | ||
print( | ||
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'" | ||
) | ||
sys.exit(1) | ||
|
||
input_json = os.path.join(sys.argv[1], "adapter_config.json") | ||
input_model = os.path.join(sys.argv[1], "adapter_model.bin") | ||
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin") | ||
|
||
model = torch.load(input_model, map_location="cpu") | ||
|
||
with open(input_json, "r") as f: | ||
params = json.load(f) | ||
|
||
if params["peft_type"] != "LORA": | ||
print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") | ||
sys.exit(1) | ||
|
||
if params["fan_in_fan_out"] == True: | ||
print("Error: param fan_in_fan_out is not supported") | ||
sys.exit(1) | ||
|
||
if params["bias"] is not None and params["bias"] != "none": | ||
print("Error: param bias is not supported") | ||
sys.exit(1) | ||
|
||
# TODO: these seem to be layers that have been trained but without lora. | ||
# doesn't seem widely used but eventually should be supported | ||
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0: | ||
print("Error: param modules_to_save is not supported") | ||
sys.exit(1) | ||
|
||
with open(output_path, "wb") as fout: | ||
fout.truncate() | ||
|
||
write_file_header(fout, params) | ||
for k, v in model.items(): | ||
if k.endswith("lora_A.weight"): | ||
if v.dtype != torch.float16 and v.dtype != torch.float32: | ||
v = v.float() | ||
v = v.T | ||
else: | ||
v = v.float() | ||
|
||
t = v.numpy() | ||
tname = translate_tensor_name(k) | ||
print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") | ||
write_tensor_header(fout, tname, t.shape, t.dtype) | ||
t.tofile(fout) | ||
|
||
print(f"Converted {input_json} and {input_model} to {output_path}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.