vllm.renderers.hf ¶

_PROCESSOR_CHAT_TEMPLATES `module-attribute` ¶

_PROCESSOR_CHAT_TEMPLATES = dict[
    tuple[str, bool], str | None
]()

Used in _try_get_processor_chat_template to avoid calling cached_get_processor again if the processor fails to be loaded.

This is needed because lru_cache does not cache when an exception happens.

build_video_prompts_from_mm_data ¶

build_video_prompts_from_mm_data(
    mm_data: MultiModalDataDict,
) -> list[str]

Build video prompts from vision_chunk data.

Collects prompts from video chunks and groups them by video_idx.

Parameters:

Name	Type	Description	Default
`mm_data`	`MultiModalDataDict`	Processed multimodal data with vision_chunk items	required

Returns:

Type	Description
`list[str]`	List of video prompts, one per video.

Source code in vllm/renderers/hf.py

def build_video_prompts_from_mm_data(
    mm_data: "MultiModalDataDict",
) -> list[str]:
    """Build video prompts from vision_chunk data.

    Collects prompts from video chunks and groups them by video_idx.

    Args:
        mm_data: Processed multimodal data with vision_chunk items

    Returns:
        List of video prompts, one per video.
    """
    vision_chunks = mm_data.get("vision_chunk")
    if vision_chunks is None:
        return []

    # Group chunks by video_idx
    video_prompts_dict: dict[int, list[str]] = defaultdict(list)

    for item in vision_chunks:
        # vision_chunk items are always dicts (VisionChunkImage/VisionChunkVideo)
        assert isinstance(item, dict)
        if item.get("type") == "video_chunk":
            video_idx = item.get("video_idx", 0)
            prompt = item.get("prompt", "")
            video_prompts_dict[video_idx].append(prompt)

    # Build prompts in video order
    video_prompts = [
        "".join(video_prompts_dict[video_idx])
        for video_idx in sorted(video_prompts_dict.keys())
    ]

    return video_prompts

rebuild_mm_uuids_from_mm_data ¶

rebuild_mm_uuids_from_mm_data(
    mm_uuids: MultiModalUUIDDict,
    mm_data: MultiModalDataDict,
) -> MultiModalUUIDDict

Rebuild mm_uuids after vision_chunk processing.

When videos are split into chunks, the original UUIDs need to be updated to reflect the new UUIDs generated for each chunk.