Skip to content

Commit

Permalink
support InternVL2-2B
Browse files Browse the repository at this point in the history
  • Loading branch information
HarmonyHu committed Sep 16, 2024
1 parent a3f66f0 commit 9e90034
Show file tree
Hide file tree
Showing 21 changed files with 2,499 additions and 140 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
|Yi-34B-chat |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/01-ai/Yi-34B-Chat) |
|Qwen-VL-Chat |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen-VL-Chat) |
|InternVL2-4B |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/OpenGVLab/InternVL2-4B) |
|InternVL2-2B |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/OpenGVLab/InternVL2-2B) |


如果您想要知道转换细节和源码,可以到本项目[models](./models)子目录查看各类模型部署细节。
Expand Down Expand Up @@ -86,7 +87,7 @@ git clone https://github.com/sophgo/LLM-TPU.git
| Qwen1.5-1.8B | ./run.sh --model qwen1.5-1.8b --arch soc | ./run.sh --model qwen1.5-1.8b --arch pcie |
| LWM-Text-Chat | ./run.sh --model lwm-text-chat --arch soc | ./run.sh --model lwm-text-chat --arch pcie |
| WizardCoder-15B | ./run.sh --model wizardcoder-15b --arch soc | ./run.sh --model wizardcoder-15b --arch pcie |

| InternVL2-4B | ./run.sh --model internvl2-4b --arch soc | ./run.sh --model internvl2-4b --arch pcie |

## 进阶功能
进阶功能说明:
Expand Down
14 changes: 12 additions & 2 deletions models/InternVL2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,20 @@ cd build && cmake .. && make && cp *cpython* .. && cd ..
* python demo

```
python3 pipeline.py --model_path internvl2-4b_bm1684x_int4.bmodel --tokenizer ../support/token_config/ --devid 0
python3 pipeline.py --model_path internvl2-4b_bm1684x_int4.bmodel --tokenizer ../support/token_config_4b --devid 0
```
model为实际的model储存路径;tokenizer_path为实际的tokenizer配置的储存路径

* 运行效果

![](../../assets/internvl2-4b.png)
![](../../assets/internvl2-4b.png)

## 常见问题

#### 是否支持InternVL2-2B ?

是支持的,步骤基本一致。
1.`files/InternVL2-2B`里面的文件替换到`InternVL2-2B`中;
2. 执行`export_onnx.py`导出onnx;
3. 执行`./compile.sh --name internvl2-2b`生成模型`internvl2-2b_bm1684x_int4.bmodel`
4. 运行程序是一致的,但是需要指定`token_config_2b`,执行命令:`python3 pipeline.py --model_path internvl2-4b_bm1684x_int4.bmodel --tokenizer ../support/token_config_2b --devid 0`
2 changes: 1 addition & 1 deletion models/InternVL2/compile/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ fi

if [ x$mode == x"int8" ]; then
quantize_args="--quantize W8BF16"
elif [ x$mode == x"f16" ]; then
elif [ x$mode == x"bf16" ]; then
quantize_args="--quantize BF16"
elif [ x$mode == x"int4" ]; then
quantize_args="--quantize W4BF16 --q_group_size 64"
Expand Down
95 changes: 25 additions & 70 deletions models/InternVL2/compile/export_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
args = parser.parse_args()

model_path = args.model_path
is_4B = "InternVL2-4B" in model_path
folder = f"./tmp/onnx"

origin_model = AutoModelForCausalLM.from_pretrained(
Expand All @@ -48,7 +49,7 @@
HEAD_DIM = HIDDEN_SIZE // NUM_ATTENTION_HEADS
VOCAB_SIZE = config.llm_config.vocab_size
DOWNSAMPLE_RATIO = config.downsample_ratio
EOS_TOKEN_ID = config.llm_config.eos_token_id
ID_EOS = config.llm_config.eos_token_id
print(f'Layers: {NUM_LAYERS}\nHidden size: {HIDDEN_SIZE}\n')

vit = origin_model.vision_model
Expand All @@ -63,9 +64,10 @@ class Embedding(torch.nn.Module):

def __init__(self):
super().__init__()
self.embed = transformer.get_input_embeddings()

def forward(self, input_ids):
hidden_states = transformer.embed_tokens(input_ids)
hidden_states = self.embed(input_ids)
return hidden_states


Expand All @@ -75,13 +77,18 @@ def __init__(self, layer_id):
super().__init__()
self.layer_id = layer_id
self.layer = layers[layer_id]
self.rotary_emb = self.layer.self_attn.rotary_emb

position_ids = torch.tensor(
[range(SEQ_LENGTH)], dtype=torch.long).cuda()
value_states = torch.randn(
(1, SEQ_LENGTH, config.llm_config.num_key_value_heads, HEAD_DIM)).bfloat16().cuda()
self.cos, self.sin = self.rotary_emb(
value_states, position_ids, SEQ_LENGTH)
if is_4B:
self.rotary_emb = self.layer.self_attn.rotary_emb
self.cos, self.sin = self.rotary_emb(
value_states, position_ids, SEQ_LENGTH)
else:
self.rotary_emb = self.layer.attention.rotary_emb
self.cos, self.sin = self.rotary_emb(value_states, SEQ_LENGTH)
self.cos = self.cos.view(SEQ_LENGTH, HEAD_DIM)
self.sin = self.sin.view(SEQ_LENGTH, HEAD_DIM)

Expand All @@ -105,13 +112,17 @@ def __init__(self, layer_id):
super().__init__()
self.layer_id = layer_id
self.layer = layers[layer_id]
self.rotary_emb = self.layer.self_attn.rotary_emb
position_ids = torch.tensor(
[range(SEQ_LENGTH)], dtype=torch.long).cuda()
value_states = torch.randn(
(1, SEQ_LENGTH, config.llm_config.num_key_value_heads, HEAD_DIM)).bfloat16().cuda()
self.cos, self.sin = self.rotary_emb(
value_states, position_ids, SEQ_LENGTH)
if is_4B:
self.rotary_emb = self.layer.self_attn.rotary_emb
self.cos, self.sin = self.rotary_emb(
value_states, position_ids, SEQ_LENGTH)
else:
self.rotary_emb = self.layer.attention.rotary_emb
self.cos, self.sin = self.rotary_emb(value_states, SEQ_LENGTH)
self.cos = self.cos.view(SEQ_LENGTH, HEAD_DIM)
self.sin = self.sin.view(SEQ_LENGTH, HEAD_DIM)

Expand All @@ -134,10 +145,11 @@ class LmHead(torch.nn.Module):

def __init__(self):
super().__init__()
self.lm_head = origin_model.language_model.get_output_embeddings()

def forward(self, hidden_states):
hidden_states = transformer.norm(hidden_states)
m_logits = origin_model.language_model.lm_head(hidden_states)
m_logits = self.lm_head(hidden_states)
_, token = torch.topk(m_logits.float(), 1)
return token

Expand Down Expand Up @@ -251,68 +263,10 @@ def build_transform(input_size):
return transform


def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
best_ratio_diff = float('inf')
best_ratio = (1, 1)
area = width * height
for ratio in target_ratios:
target_aspect_ratio = ratio[0] / ratio[1]
ratio_diff = abs(aspect_ratio - target_aspect_ratio)
if ratio_diff < best_ratio_diff:
best_ratio_diff = ratio_diff
best_ratio = ratio
elif ratio_diff == best_ratio_diff:
if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
best_ratio = ratio
return best_ratio


def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
orig_width, orig_height = image.size
aspect_ratio = orig_width / orig_height

# calculate the existing image aspect ratio
target_ratios = set(
(i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
i * j <= max_num and i * j >= min_num)
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])

# find the closest aspect ratio to the target
target_aspect_ratio = find_closest_aspect_ratio(
aspect_ratio, target_ratios, orig_width, orig_height, image_size)

# calculate the target width and height
target_width = image_size * target_aspect_ratio[0]
target_height = image_size * target_aspect_ratio[1]
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]

# resize the image
resized_img = image.resize((target_width, target_height))
processed_images = []
for i in range(blocks):
box = (
(i % (target_width // image_size)) * image_size,
(i // (target_width // image_size)) * image_size,
((i % (target_width // image_size)) + 1) * image_size,
((i // (target_width // image_size)) + 1) * image_size
)
# split the image
split_img = resized_img.crop(box)
processed_images.append(split_img)
assert len(processed_images) == blocks
if use_thumbnail and len(processed_images) != 1:
thumbnail_img = image.resize((image_size, image_size))
processed_images.append(thumbnail_img)
return processed_images


def load_image(image_file, input_size=448, max_num=12):
image = Image.open(image_file).convert('RGB')
transform = build_transform(input_size=input_size)
images = dynamic_preprocess(
image, image_size=input_size, use_thumbnail=True, max_num=max_num)
pixel_values = [transform(image) for image in images]
pixel_values = torch.stack(pixel_values)
pixel_values = transform(image)
return pixel_values


Expand All @@ -332,7 +286,8 @@ def test_net_with_mask():
pixel_values = load_image(jpg, max_num=1).to(
torch.bfloat16).cuda() # [1, 3, 448, 448]
vit_embeds = vit_infer(pixel_values) # [1, 256, 3072]

ID_IM_END = tokenizer.convert_tokens_to_ids("<|im_end|>")
ID_END = tokenizer.convert_tokens_to_ids("<|end|>")
token_len = len(ids)
ids = ids + (SEQ_LENGTH - token_len) * [0]
input_ids = torch.tensor(ids).view(SEQ_LENGTH).cuda()
Expand Down Expand Up @@ -362,7 +317,7 @@ def test_net_with_mask():
lm = LmHead()
token = lm(out.bfloat16()).view(1)
out_ids = [int(token)]
while int(token) < EOS_TOKEN_ID and token_len < SEQ_LENGTH:
while int(token) not in [ID_EOS, ID_IM_END, ID_END] and token_len < SEQ_LENGTH:
token_len += 1
input_ids = torch.tensor([token]).cuda()
out = embed(input_ids).view(1, 1, HIDDEN_SIZE)
Expand Down
143 changes: 143 additions & 0 deletions models/InternVL2/compile/files/InternVL2-2B/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{
"_commit_hash": null,
"architectures": [
"InternVLChatModel"
],
"auto_map": {
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
},
"downsample_ratio": 0.5,
"dynamic_image_size": true,
"force_image_size": 448,
"llm_config": {
"_name_or_path": "internlm/internlm2-chat-1_8b",
"add_cross_attention": false,
"architectures": [
"InternLM2ForCausalLM"
],
"attn_implementation": "flash_attention_2",
"auto_map": {
"AutoConfig": "configuration_internlm2.InternLM2Config",
"AutoModel": "modeling_internlm2.InternLM2ForCausalLM",
"AutoModelForCausalLM": "modeling_internlm2.InternLM2ForCausalLM"
},
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bias": false,
"bos_token_id": 1,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "silu",
"hidden_size": 2048,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"initializer_range": 0.02,
"intermediate_size": 8192,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"max_position_embeddings": 512,
"min_length": 0,
"model_type": "internlm2",
"no_repeat_ngram_size": 0,
"num_attention_heads": 16,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 24,
"num_key_value_heads": 8,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": 2,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 2.0,
"type": "dynamic"
},
"rope_theta": 1000000,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": false,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": "bfloat16",
"torchscript": false,
"transformers_version": "4.37.2",
"typical_p": 1.0,
"use_bfloat16": true,
"use_cache": true,
"vocab_size": 92553
},
"max_dynamic_patch": 12,
"min_dynamic_patch": 1,
"model_type": "internvl_chat",
"ps_version": "v2",
"select_layer": -1,
"template": "internlm2-chat",
"torch_dtype": "bfloat16",
"use_backbone_lora": 0,
"use_llm_lora": 0,
"use_thumbnail": true,
"vision_config": {
"architectures": [
"InternVisionModel"
],
"attention_dropout": 0.0,
"drop_path_rate": 0.0,
"dropout": 0.0,
"hidden_act": "gelu",
"hidden_size": 1024,
"image_size": 448,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-06,
"model_type": "intern_vit_6b",
"norm_type": "layer_norm",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 24,
"output_attentions": false,
"output_hidden_states": false,
"patch_size": 14,
"qk_normalization": false,
"qkv_bias": true,
"return_dict": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.37.2",
"use_bfloat16": true,
"use_flash_attn": true
}
}
Loading

0 comments on commit 9e90034

Please sign in to comment.