Skip to content

Commit

Permalink
add debug info
Browse files Browse the repository at this point in the history
  • Loading branch information
hlin99 committed Oct 24, 2024
1 parent db392a6 commit af49436
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion vllm/model_executor/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@ def __init__(
self.act_fn = SiluAndMul()

def forward(self, x):
print(" DeepseekV2MLP +++")
gate_up, _ = self.gate_up_proj(x)
x = self.act_fn(gate_up)
x, _ = self.down_proj(x)
print(" DeepseekV2MLP ---")
return x


Expand Down Expand Up @@ -140,6 +142,7 @@ def __init__(
)

def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
print(" DeepseekV2MoE +++ ")
num_tokens, hidden_dim = hidden_states.shape
hidden_states = hidden_states.view(-1, hidden_dim)
if self.n_shared_experts is not None:
Expand All @@ -154,7 +157,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
if self.tp_size > 1:
final_hidden_states = tensor_model_parallel_all_reduce(
final_hidden_states)

print(" DeepseekV2MoE ---- ")
return final_hidden_states.view(num_tokens, hidden_dim)


Expand Down Expand Up @@ -276,6 +279,7 @@ def forward(
kv_cache: torch.Tensor,
attn_metadata: AttentionMetadata,
) -> torch.Tensor:
print(" DeepseekV2Attention +++")
if self.q_lora_rank is not None:
q = self.q_a_proj(hidden_states)[0]
q = self.q_a_layernorm(q)
Expand Down Expand Up @@ -327,6 +331,7 @@ def forward(
-1, self.num_local_heads, 256)[..., :self.v_head_dim].reshape(
-1, self.num_local_heads * self.v_head_dim)
output, _ = self.o_proj(attn_output)
print("DeepseekV2Attention ---")
return output


Expand Down Expand Up @@ -395,6 +400,7 @@ def forward(
attn_metadata: AttentionMetadata,
residual: Optional[torch.Tensor],
) -> torch.Tensor:
print("DeepseekV2DecoderLayer +++")
# Self Attention
if residual is None:
residual = hidden_states
Expand All @@ -415,6 +421,7 @@ def forward(
hidden_states, residual = self.post_attention_layernorm(
hidden_states, residual)
hidden_states = self.mlp(hidden_states)
print("DeepseekV2DecoderLayer ---")
return hidden_states, residual


Expand Down Expand Up @@ -467,6 +474,7 @@ def forward(
attn_metadata: AttentionMetadata,
intermediate_tensors: Optional[IntermediateTensors],
) -> Union[torch.Tensor, IntermediateTensors]:
print(" DeepseekV2Model +++")
if get_pp_group().is_first_rank:
hidden_states = self.embed_tokens(input_ids)
residual = None
Expand All @@ -488,6 +496,7 @@ def forward(
})

hidden_states, _ = self.norm(hidden_states, residual)
print("DeepseekV2Model --- ")
return hidden_states


Expand Down

0 comments on commit af49436

Please sign in to comment.