Skip to content

vllm.model_executor.models.bee

BeeProcessingInfo

Bases: LlavaNextProcessingInfo

Source code in vllm/model_executor/models/bee.py
class BeeProcessingInfo(LlavaNextProcessingInfo):
    def get_hf_config(self):
        return self.ctx.get_hf_config()

    def get_hf_processor(self, **kwargs: object):
        return self.ctx.get_hf_processor(**kwargs)

    def _get_num_unpadded_features(
        self,
        *,
        original_height: int,
        original_width: int,
        npatches: int,
        num_patch_height: int,
        num_patch_width: int,
    ) -> tuple[int, int]:
        """Override to use correct max_num_patches from vision_aspect_ratio."""
        import math

        current_height = npatches * num_patch_height
        current_width = npatches * num_patch_width

        aspect_ratio = original_width / original_height
        current_aspect_ratio = current_width / current_height

        if aspect_ratio > current_aspect_ratio:
            new_height = int(
                round(original_height * (current_width / original_width), 7)
            )
            padding = (current_height - new_height) // 2
            current_height = current_height - (2 * padding)
        else:
            new_width = int(
                round(original_width * (current_height / original_height), 7)
            )
            padding = (current_width - new_width) // 2
            current_width = current_width - (2 * padding)

        unpadded_features = current_height * current_width
        newline_features = current_height

        # Get max_num_patches from vision_aspect_ratio config
        hf_config = self.get_hf_config()
        vision_aspect_ratio = getattr(hf_config, "vision_aspect_ratio", "anyres_max_9")
        max_num_patches = int(vision_aspect_ratio.replace("anyres_max_", ""))

        ratio = math.sqrt(
            current_height * current_width / (max_num_patches * npatches**2)
        )
        if ratio > 1.1:
            height_factor = int(current_height // ratio)
            width_factor = int(current_width // ratio)
            unpadded_features = height_factor * width_factor
            newline_features = height_factor

        return (unpadded_features, newline_features)

_get_num_unpadded_features

_get_num_unpadded_features(
    *,
    original_height: int,
    original_width: int,
    npatches: int,
    num_patch_height: int,
    num_patch_width: int,
) -> tuple[int, int]

Override to use correct max_num_patches from vision_aspect_ratio.

Source code in vllm/model_executor/models/bee.py
def _get_num_unpadded_features(
    self,
    *,
    original_height: int,
    original_width: int,
    npatches: int,
    num_patch_height: int,
    num_patch_width: int,
) -> tuple[int, int]:
    """Override to use correct max_num_patches from vision_aspect_ratio."""
    import math

    current_height = npatches * num_patch_height
    current_width = npatches * num_patch_width

    aspect_ratio = original_width / original_height
    current_aspect_ratio = current_width / current_height

    if aspect_ratio > current_aspect_ratio:
        new_height = int(
            round(original_height * (current_width / original_width), 7)
        )
        padding = (current_height - new_height) // 2
        current_height = current_height - (2 * padding)
    else:
        new_width = int(
            round(original_width * (current_height / original_height), 7)
        )
        padding = (current_width - new_width) // 2
        current_width = current_width - (2 * padding)

    unpadded_features = current_height * current_width
    newline_features = current_height

    # Get max_num_patches from vision_aspect_ratio config
    hf_config = self.get_hf_config()
    vision_aspect_ratio = getattr(hf_config, "vision_aspect_ratio", "anyres_max_9")
    max_num_patches = int(vision_aspect_ratio.replace("anyres_max_", ""))

    ratio = math.sqrt(
        current_height * current_width / (max_num_patches * npatches**2)
    )
    if ratio > 1.1:
        height_factor = int(current_height // ratio)
        width_factor = int(current_width // ratio)
        unpadded_features = height_factor * width_factor
        newline_features = height_factor

    return (unpadded_features, newline_features)