vllm.entrypoints.cli ¶

Modules:

Name	Description
`benchmark`
`collect_env`
`main`	The CLI entrypoints of vLLM
`openai`
`run_batch`
`serve`
`types`

BenchmarkLatencySubcommand ¶

Bases: BenchmarkSubcommandBase

The latency subcommand for vllm bench.

Source code in vllm/entrypoints/cli/benchmark/latency.py

class BenchmarkLatencySubcommand(BenchmarkSubcommandBase):
    """The `latency` subcommand for `vllm bench`."""

    name = "latency"
    help = "Benchmark the latency of a single batch of requests."

    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser) -> None:
        add_cli_args(parser)

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        main(args)

BenchmarkMMProcessorSubcommand ¶

Bases: BenchmarkSubcommandBase

The mm-processor subcommand for vllm bench.

Source code in vllm/entrypoints/cli/benchmark/mm_processor.py

class BenchmarkMMProcessorSubcommand(BenchmarkSubcommandBase):
    """The `mm-processor` subcommand for `vllm bench`."""

    name = "mm-processor"
    help = "Benchmark multimodal processor latency across different configurations."

    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser) -> None:
        add_cli_args(parser)

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        main(args)

BenchmarkServingSubcommand ¶

Bases: BenchmarkSubcommandBase

The serve subcommand for vllm bench.

Source code in vllm/entrypoints/cli/benchmark/serve.py

class BenchmarkServingSubcommand(BenchmarkSubcommandBase):
    """The `serve` subcommand for `vllm bench`."""

    name = "serve"
    help = "Benchmark the online serving throughput."

    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser) -> None:
        add_cli_args(parser)

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        main(args)

BenchmarkStartupSubcommand ¶

Bases: BenchmarkSubcommandBase

The startup subcommand for vllm bench.

Source code in vllm/entrypoints/cli/benchmark/startup.py

class BenchmarkStartupSubcommand(BenchmarkSubcommandBase):
    """The `startup` subcommand for `vllm bench`."""

    name = "startup"
    help = "Benchmark the startup time of vLLM models."

    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser) -> None:
        add_cli_args(parser)

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        main(args)

BenchmarkSweepSubcommand ¶

Bases: BenchmarkSubcommandBase

The sweep subcommand for vllm bench.

Source code in vllm/entrypoints/cli/benchmark/sweep.py

class BenchmarkSweepSubcommand(BenchmarkSubcommandBase):
    """The `sweep` subcommand for `vllm bench`."""

    name = "sweep"
    help = "Benchmark for a parameter sweep."

    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser) -> None:
        add_cli_args(parser)

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        main(args)

BenchmarkThroughputSubcommand ¶

Bases: BenchmarkSubcommandBase

The throughput subcommand for vllm bench.

Source code in vllm/entrypoints/cli/benchmark/throughput.py

class BenchmarkThroughputSubcommand(BenchmarkSubcommandBase):
    """The `throughput` subcommand for `vllm bench`."""

    name = "throughput"
    help = "Benchmark offline inference throughput."

    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser) -> None:
        add_cli_args(parser)

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        main(args)