vllm.model_executor.layers.fused_moe.all2all_utils ¶

maybe_roundup_layer_hidden_size ¶

maybe_roundup_layer_hidden_size(
    hidden_size: int,
    act_dtype: dtype,
    moe_parallel_config: FusedMoEParallelConfig,
) -> int

Given layer hidden size and MoE configurations, round up hidden_size if necessary.

Parameters:

Name	Type	Description	Default
`hidden_size`	`int`	Layer hidden-size	required
`act_dtype`	`dtype`	Data type of the layer activations.	required
`moe_parallel_config`	`FusedMoEParallelConfig`	Fused MoE parallelization strategy configuration.	required

Return

Rounded up hidden_size if rounding up is required based on the configs and all2all backend. Original hidden size otherwise.

Source code in vllm/model_executor/layers/fused_moe/all2all_utils.py

def maybe_roundup_layer_hidden_size(
    hidden_size: int,
    act_dtype: torch.dtype,
    moe_parallel_config: FusedMoEParallelConfig,
) -> int:
    """
    Given layer hidden size and MoE configurations, round up hidden_size
    if necessary.

    Args:
        hidden_size: Layer hidden-size
        act_dtype: Data type of the layer activations.
        moe_parallel_config: Fused MoE parallelization strategy configuration.

    Return:
        Rounded up hidden_size if rounding up is required based on the configs
        and all2all backend.
        Original hidden size otherwise.
    """
    if moe_parallel_config.use_deepep_ht_kernels:
        hidden_size = DeepEPHTPrepareAndFinalize.maybe_roundup_layer_hidden_size(
            hidden_size, act_dtype
        )

    if moe_parallel_config.use_deepep_ll_kernels:
        hidden_size = DeepEPLLPrepareAndFinalize.maybe_roundup_layer_hidden_size(
            hidden_size
        )

    return hidden_size