vllm.parser ¶

Modules:

Name	Description
`abstract_parser`
`parser_manager`

DelegatingParser ¶

Bases: Parser

A Parser implementation that delegates to separate ReasoningParser and ToolParser instances.

This is the recommended base class for creating model-specific parsers that combine existing reasoning and tool parser implementations. Subclasses should set self._reasoning_parser and self._tool_parser in their __init__ method.

If either parser is None, the corresponding methods will return default values (no reasoning extraction, no tool calls).

Source code in vllm/parser/abstract_parser.py

class DelegatingParser(Parser):
    """
    A Parser implementation that delegates to separate ReasoningParser and
    ToolParser instances.

    This is the recommended base class for creating model-specific parsers
    that combine existing reasoning and tool parser implementations.
    Subclasses should set `self._reasoning_parser` and `self._tool_parser`
    in their `__init__` method.

    If either parser is None, the corresponding methods will return default
    values (no reasoning extraction, no tool calls).
    """

    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
    ) -> tuple[str | None, str | None]:
        if self._reasoning_parser is None:
            return None, model_output
        return self._reasoning_parser.extract_reasoning(model_output, request)

    def extract_response_outputs(
        self,
        *,
        model_output: str,
        model_output_token_ids: Sequence[int],
        request: ResponsesRequest,
        enable_auto_tools: bool = False,
        tool_call_id_type: str = "random",
        logprobs: list[Logprob] | None = None,
    ) -> list[ResponseOutputItem]:
        # First extract reasoning
        reasoning, content = self.extract_reasoning(model_output, request)

        # Then parse tool calls from the content
        tool_calls, content = self._parse_tool_calls(
            request=request,
            content=content,
            enable_auto_tools=enable_auto_tools,
        )

        # Build output items
        outputs: list[ResponseOutputItem] = []

        # Add reasoning item if present
        if reasoning:
            reasoning_item = ResponseReasoningItem(
                id=f"rs_{random_uuid()}",
                summary=[],
                type="reasoning",
                content=[
                    ResponseReasoningTextContent(text=reasoning, type="reasoning_text")
                ],
                status=None,  # NOTE: Only the last output item has status.
            )
            outputs.append(reasoning_item)

        # Add message item if there's content
        if content:
            res_text_part = ResponseOutputText(
                text=content,
                annotations=[],
                type="output_text",
                logprobs=logprobs,
            )
            message_item = ResponseOutputMessage(
                id=f"msg_{random_uuid()}",
                content=[res_text_part],
                role="assistant",
                status="completed",
                type="message",
            )
            outputs.append(message_item)

        if tool_calls:
            # We use a simple counter for history_tool_call_count because
            # we don't track the history of tool calls in the Responses API yet.
            # This means that the tool call index will start from 0 for each
            # request.
            for history_tool_call_cnt, tool_call in enumerate(tool_calls):
                tool_call_item = ResponseFunctionToolCall(
                    id=f"fc_{random_uuid()}",
                    call_id=tool_call.id
                    if tool_call.id
                    else make_tool_call_id(
                        id_type=tool_call_id_type,
                        func_name=tool_call.name,
                        idx=history_tool_call_cnt,
                    ),
                    type="function_call",
                    status="completed",
                    name=tool_call.name,
                    arguments=tool_call.arguments,
                )
                outputs.append(tool_call_item)

        return outputs

    def _get_function_name(
        self, request: ChatCompletionRequest | ResponsesRequest
    ) -> str:
        if request.tool_choice and isinstance(request.tool_choice, ToolChoiceFunction):
            return request.tool_choice.name
        if request.tool_choice and isinstance(
            request.tool_choice, ChatCompletionNamedToolChoiceParam
        ):
            return request.tool_choice.function.name
        raise ValueError("Invalid tool_choice for function name extraction.")

    def _parse_tool_calls(
        self,
        request: ResponsesRequest,
        content: str | None,
        enable_auto_tools: bool,
    ) -> tuple[list[FunctionCall], str | None]:
        """
        TODO(qandrew): merge _parse_tool_calls_from_content
        for ChatCompletions into this function
        Parse tool calls from content based on request tool_choice settings.

        Returns:
            A tuple of (function_calls, remaining_content) if tool calls
            were parsed
        """
        function_calls: list[FunctionCall] = []

        if request.tool_choice and isinstance(
            request.tool_choice,
            (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
        ):
            # Forced Function Call
            if content is None:
                return [], None
            function_calls.append(
                FunctionCall(name=self._get_function_name(request), arguments=content)
            )
            return function_calls, None  # Clear content since tool is called.

        if request.tool_choice == "required":
            # Required tool calls - parse JSON
            tool_calls = []
            with contextlib.suppress(ValidationError):
                content = content or ""
                tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
                    content
                )
            for tool_call in tool_calls:
                function_calls.append(
                    FunctionCall(
                        name=tool_call.name,
                        arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
                    )
                )
            return function_calls, None  # Clear content since tool is called.

        if (
            self._tool_parser is not None
            and enable_auto_tools
            and (request.tool_choice == "auto" or request.tool_choice is None)
        ):
            # Automatic Tool Call Parsing
            tool_call_info = self._tool_parser.extract_tool_calls(
                content if content is not None else "",
                request=request,  # type: ignore
            )
            if tool_call_info is not None and tool_call_info.tools_called:
                function_calls.extend(
                    FunctionCall(
                        id=tool_call.id,
                        name=tool_call.function.name,
                        arguments=tool_call.function.arguments,
                    )
                    for tool_call in tool_call_info.tool_calls
                )
                remaining_content = tool_call_info.content
                if remaining_content and remaining_content.strip() == "":
                    remaining_content = None
                return function_calls, remaining_content

        # No tool calls
        return [], content

    def _extract_tool_calls(
        self,
        content: str | None,
        request: ChatCompletionRequest | ResponsesRequest,
        enable_auto_tools: bool = False,
    ) -> tuple[list[FunctionCall] | None, str | None]:
        tool_parser = self._tool_parser
        if tool_parser is None:
            return [], content

        # When the Mistral grammar factory injected structured outputs,
        # let the parser handle the output.
        use_mistral_tool_parser = (
            is_mistral_tool_parser(type(tool_parser))
            and isinstance(request, ChatCompletionRequest)
            and request._grammar_from_tool_parser
        )

        supports_required_and_named = tool_parser.supports_required_and_named
        is_named_tool_choice = request.tool_choice and isinstance(
            request.tool_choice,
            (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
        )
        is_required_tool_choice = request.tool_choice == "required"
        is_auto_tool_choice = enable_auto_tools and (
            request.tool_choice == "auto"
            or request.tool_choice is None
            or (
                not supports_required_and_named
                and (is_named_tool_choice or is_required_tool_choice)
            )
        )

        tool_calls = list[FunctionCall]()
        if (
            is_named_tool_choice
            and supports_required_and_named
            and not use_mistral_tool_parser
        ):
            if content is None:
                return [], None
            tool_calls.append(
                FunctionCall(
                    name=self._get_function_name(request),
                    arguments=content,
                )
            )
            content = None
        elif (
            is_required_tool_choice
            and supports_required_and_named
            and not use_mistral_tool_parser
        ):
            # "required" with standard JSON-based parsing
            parsed_calls = []
            with contextlib.suppress(ValidationError):
                content = content or ""
                parsed_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
                    content
                )
            for tc in parsed_calls:
                tool_calls.append(
                    FunctionCall(
                        name=tc.name,
                        arguments=json.dumps(tc.parameters, ensure_ascii=False),
                    )
                )
            content = None
        elif is_auto_tool_choice or use_mistral_tool_parser:
            # Automatic Tool Call Parsing (also used as fallback for
            # required/named when supports_required_and_named=False)
            tool_call_info = tool_parser.extract_tool_calls(
                content if content is not None else "",
                request=request,  # type: ignore
            )
            if tool_call_info is not None and tool_call_info.tools_called:
                tool_calls.extend(
                    FunctionCall(
                        id=tc.id,
                        name=tc.function.name,
                        arguments=tc.function.arguments,
                    )
                    for tc in tool_call_info.tool_calls
                )
                content = tool_call_info.content
                if content and content.strip() == "":
                    content = None
            else:
                # No tool calls.
                return None, content

        return tool_calls, content

    def adjust_request(
        self, request: ChatCompletionRequest | ResponsesRequest
    ) -> ChatCompletionRequest | ResponsesRequest:
        if self._reasoning_parser is not None:
            request = self._reasoning_parser.adjust_request(request)
        if self._tool_parser is not None:
            request = self._tool_parser.adjust_request(request)
        return request

    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        if self._reasoning_parser is None:
            return DeltaMessage(content=delta_text)
        return self._reasoning_parser.extract_reasoning_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
        )

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        if self._tool_parser is None:
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
        return self._tool_parser.extract_tool_calls(model_output, request)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        if self._tool_parser is None:
            return None
        return self._tool_parser.extract_tool_calls_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
            request,
        )

    def _extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest | ResponsesRequest,
        # The following parameters are used for "required" tool choice parsing and are
        # tracked in StreamState for streaming parsing.
        tool_call_idx: int | None = None,
        tool_call_id_type: str = "random",
        function_name_returned: bool = False,
    ) -> tuple[DeltaMessage | None, bool]:
        if request.tool_choice == "none":
            return (DeltaMessage(content=delta_text) if delta_text else None), False

        assert self._tool_parser is not None
        supports_required_and_named = self._tool_parser.supports_required_and_named
        if (
            supports_required_and_named
            and request.tool_choice
            and isinstance(
                request.tool_choice,
                (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
            )
        ):
            delta_message, function_name_returned = extract_named_tool_call_streaming(
                delta_text=delta_text,
                function_name=self._get_function_name(request),
                function_name_returned=function_name_returned,
                tool_call_idx=tool_call_idx,
                tool_call_id_type=tool_call_id_type,
                tokenizer=self.model_tokenizer,
            )
            return delta_message, function_name_returned

        if supports_required_and_named and request.tool_choice == "required":
            delta_message, function_name_returned = (
                extract_required_tool_call_streaming(
                    previous_text=previous_text,
                    current_text=current_text,
                    delta_text=delta_text,
                    function_name_returned=function_name_returned,
                    tool_call_idx=tool_call_idx,
                    tool_call_id_type=tool_call_id_type,
                )
            )
            return delta_message, function_name_returned
        return self.extract_tool_calls_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
            request,  # type: ignore[arg-type]
        ), False

    def is_reasoning_end(self, input_ids: list[int]) -> bool:
        if self._reasoning_parser is None:
            return False
        return self._reasoning_parser.is_reasoning_end(input_ids)

    def is_reasoning_end_streaming(
        self, input_ids: list[int], delta_ids: list[int]
    ) -> bool:
        if self._reasoning_parser is None:
            return False
        return self._reasoning_parser.is_reasoning_end_streaming(input_ids, delta_ids)

    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        if self._reasoning_parser is None:
            return input_ids
        return self._reasoning_parser.extract_content_ids(input_ids)

    def _in_reasoning_phase(self, state: StreamState) -> bool:
        if self._reasoning_parser is None:
            return False
        return not state.reasoning_ended

    def _in_tool_call_phase(self, state: StreamState) -> bool:
        if self._tool_parser is None:
            return False
        return state.reasoning_ended

    def _append_unstreamed_tool_args(
        self,
        delta_message: DeltaMessage | None,
    ) -> None:
        """Append parsed-but-unstreamed tool-call arguments to *delta_message*."""
        if (
            self._tool_parser is not None
            and delta_message
            and delta_message.tool_calls
            and (last_tc := delta_message.tool_calls[-1]).function
        ):
            last_tc.function.arguments = (
                last_tc.function.arguments or ""
            ) + self._tool_parser.get_remaining_unstreamed_args()

    def parse(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
        enable_auto_tools: bool = False,
    ) -> tuple[str | None, str | None, list[FunctionCall] | None]:
        reasoning, content = self.extract_reasoning(model_output, request)
        tool_calls, content = self._extract_tool_calls(
            content=content,
            request=request,
            enable_auto_tools=enable_auto_tools,
        )
        return reasoning, content, tool_calls

    def parse_delta(
        self,
        delta_text: str,
        delta_token_ids: list[int],
        request: ChatCompletionRequest | ResponsesRequest,
        prompt_token_ids: list[int] | None = None,
        *,
        finished: bool,
    ) -> DeltaMessage | None:
        state = self._stream_state

        if not state.prompt_reasoning_checked and prompt_token_ids is not None:
            state.prompt_reasoning_checked = True
            if self._reasoning_parser is None or self.is_reasoning_end(
                prompt_token_ids
            ):
                state.reasoning_ended = True

        current_text = state.previous_text + delta_text
        current_token_ids = state.previous_token_ids + delta_token_ids
        delta_message: DeltaMessage | None = None

        # Reasoning extraction
        if self._in_reasoning_phase(state):
            delta_message = self.extract_reasoning_streaming(
                previous_text=state.previous_text,
                current_text=current_text,
                delta_text=delta_text,
                previous_token_ids=state.previous_token_ids,
                current_token_ids=current_token_ids,
                delta_token_ids=delta_token_ids,
            )
            if self.is_reasoning_end_streaming(current_token_ids, delta_token_ids):
                state.reasoning_ended = True
                current_token_ids = self.extract_content_ids(delta_token_ids)
                current_text = (
                    delta_message.content
                    if delta_message and delta_message.content
                    else ""
                )
                delta_text = current_text
                delta_token_ids = current_token_ids

        # Tool call extraction
        if self._in_tool_call_phase(state):
            if not state.tool_call_text_started:
                state.tool_call_text_started = True
                state.previous_text = ""
                state.previous_token_ids = []
                delta_text = current_text
                delta_token_ids = current_token_ids

            # A boundary delta may carry both reasoning and tool call,
            # save it before the tool parser overwrites delta_message.
            reasoning = delta_message.reasoning if delta_message else None
            delta_message, state.function_name_returned = (
                self._extract_tool_calls_streaming(
                    previous_text=state.previous_text,
                    current_text=current_text,
                    delta_text=delta_text,
                    previous_token_ids=state.previous_token_ids,
                    current_token_ids=current_token_ids,
                    delta_token_ids=delta_token_ids,
                    request=request,  # type: ignore[arg-type]
                    tool_call_idx=state.history_tool_call_cnt,
                    tool_call_id_type=state.tool_call_id_type,
                    function_name_returned=state.function_name_returned,
                )
            )
            if reasoning:
                if not delta_message:
                    delta_message = DeltaMessage()
                delta_message.reasoning = reasoning

            if (
                delta_message
                and delta_message.tool_calls
                and delta_message.tool_calls[0].id is not None
            ):
                state.history_tool_call_cnt += 1

        # No phase active: pass through as content
        if (
            delta_message is None
            and not self._in_reasoning_phase(state)
            and not self._in_tool_call_phase(state)
        ):
            delta_message = DeltaMessage(content=delta_text)

        state.previous_text = current_text
        state.previous_token_ids = current_token_ids

        if finished:
            self._append_unstreamed_tool_args(delta_message)

        return delta_message

_append_unstreamed_tool_args ¶

_append_unstreamed_tool_args(
    delta_message: DeltaMessage | None,
) -> None

Append parsed-but-unstreamed tool-call arguments to delta_message.

Source code in vllm/parser/abstract_parser.py

def _append_unstreamed_tool_args(
    self,
    delta_message: DeltaMessage | None,
) -> None:
    """Append parsed-but-unstreamed tool-call arguments to *delta_message*."""
    if (
        self._tool_parser is not None
        and delta_message
        and delta_message.tool_calls
        and (last_tc := delta_message.tool_calls[-1]).function
    ):
        last_tc.function.arguments = (
            last_tc.function.arguments or ""
        ) + self._tool_parser.get_remaining_unstreamed_args()

_parse_tool_calls ¶

_parse_tool_calls(
    request: ResponsesRequest,
    content: str | None,
    enable_auto_tools: bool,
) -> tuple[list[FunctionCall], str | None]

TODO(qandrew): merge _parse_tool_calls_from_content for ChatCompletions into this function Parse tool calls from content based on request tool_choice settings.

Returns:

Type	Description
`list[FunctionCall]`	A tuple of (function_calls, remaining_content) if tool calls
`str \| None`	were parsed

Source code in vllm/parser/abstract_parser.py

def _parse_tool_calls(
    self,
    request: ResponsesRequest,
    content: str | None,
    enable_auto_tools: bool,
) -> tuple[list[FunctionCall], str | None]:
    """
    TODO(qandrew): merge _parse_tool_calls_from_content
    for ChatCompletions into this function
    Parse tool calls from content based on request tool_choice settings.

    Returns:
        A tuple of (function_calls, remaining_content) if tool calls
        were parsed
    """
    function_calls: list[FunctionCall] = []

    if request.tool_choice and isinstance(
        request.tool_choice,
        (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
    ):
        # Forced Function Call
        if content is None:
            return [], None
        function_calls.append(
            FunctionCall(name=self._get_function_name(request), arguments=content)
        )
        return function_calls, None  # Clear content since tool is called.

    if request.tool_choice == "required":
        # Required tool calls - parse JSON
        tool_calls = []
        with contextlib.suppress(ValidationError):
            content = content or ""
            tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
                content
            )
        for tool_call in tool_calls:
            function_calls.append(
                FunctionCall(
                    name=tool_call.name,
                    arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
                )
            )
        return function_calls, None  # Clear content since tool is called.

    if (
        self._tool_parser is not None
        and enable_auto_tools
        and (request.tool_choice == "auto" or request.tool_choice is None)
    ):
        # Automatic Tool Call Parsing
        tool_call_info = self._tool_parser.extract_tool_calls(
            content if content is not None else "",
            request=request,  # type: ignore
        )
        if tool_call_info is not None and tool_call_info.tools_called:
            function_calls.extend(
                FunctionCall(
                    id=tool_call.id,
                    name=tool_call.function.name,
                    arguments=tool_call.function.arguments,
                )
                for tool_call in tool_call_info.tool_calls
            )
            remaining_content = tool_call_info.content
            if remaining_content and remaining_content.strip() == "":
                remaining_content = None
            return function_calls, remaining_content

    # No tool calls
    return [], content

Parser ¶

Abstract Parser class that unifies ReasoningParser and ToolParser into a single interface for parsing model output.

This class provides a unified way to handle both reasoning extraction (e.g., chain-of-thought content in tags) and tool call extraction (e.g., function calls in XML/JSON format) from model outputs.

Subclasses can either: 1. Override the abstract methods directly for custom parsing logic 2. Set reasoning_parser and tool_parser properties to delegate to existing parser implementations

Class Attributes

reasoning_parser_cls: The ReasoningParser class to use (for compatibility with code that needs the class, not instance). tool_parser_cls: The ToolParser class to use (for compatibility with code that needs the class, not instance).

Source code in vllm/parser/abstract_parser.py

class Parser:
    """
    Abstract Parser class that unifies ReasoningParser and ToolParser into
    a single interface for parsing model output.

    This class provides a unified way to handle both reasoning extraction
    (e.g., chain-of-thought content in <think> tags) and tool call extraction
    (e.g., function calls in XML/JSON format) from model outputs.

    Subclasses can either:
    1. Override the abstract methods directly for custom parsing logic
    2. Set `reasoning_parser` and `tool_parser` properties to delegate to
       existing parser implementations

    Class Attributes:
        reasoning_parser_cls: The ReasoningParser class to use (for compatibility
            with code that needs the class, not instance).
        tool_parser_cls: The ToolParser class to use (for compatibility with
            code that needs the class, not instance).
    """

    # Class-level parser classes for compatibility with existing patterns
    # Subclasses should override these if they use specific parser classes
    reasoning_parser_cls: type[ReasoningParser] | None = None
    tool_parser_cls: type[ToolParser] | None = None

    def __init__(
        self,
        tokenizer: TokenizerLike,
        tools: list[Tool] | None = None,
        *args,
        **kwargs,
    ):
        self.model_tokenizer = tokenizer
        self._reasoning_parser: ReasoningParser | None = None
        self._tool_parser: ToolParser | None = None
        self._stream_state = StreamState()

        if self.__class__.reasoning_parser_cls is not None:
            self._reasoning_parser = self.__class__.reasoning_parser_cls(
                tokenizer, *args, **kwargs
            )
        if self.__class__.tool_parser_cls is not None:
            self._tool_parser = self.__class__.tool_parser_cls(tokenizer, tools)

    @cached_property
    def vocab(self) -> dict[str, int]:
        """Get the vocabulary mapping from tokens to IDs."""
        return self.model_tokenizer.get_vocab()

    @property
    def reasoning_parser(self) -> ReasoningParser | None:
        """The underlying reasoning parser, if any."""
        return self._reasoning_parser

    @reasoning_parser.setter
    def reasoning_parser(self, parser: ReasoningParser | None) -> None:
        self._reasoning_parser = parser

    @property
    def tool_parser(self) -> ToolParser | None:
        """The underlying tool parser, if any."""
        return self._tool_parser

    @tool_parser.setter
    def tool_parser(self, parser: ToolParser | None) -> None:
        self._tool_parser = parser

    # ========== Reasoning Parser Methods ==========

    @abstractmethod
    def is_reasoning_end(self, input_ids: list[int]) -> bool:
        """
        Check if the reasoning content ends in the input_ids.

        Used by structured engines like `xgrammar` to check if the
        reasoning content ends in the model output.

        Args:
            input_ids: The token IDs of the model output.

        Returns:
            True if the reasoning content ends in the input_ids.
        """

    def is_reasoning_end_streaming(
        self, input_ids: list[int], delta_ids: list[int]
    ) -> bool:
        """
        Check if the reasoning content ends during a decode step.

        Args:
            input_ids: The entire model output token IDs.
            delta_ids: The last few computed tokens at the current decode step.

        Returns:
            True if the reasoning content ends in the delta_ids.
        """
        return self.is_reasoning_end(input_ids)

    @abstractmethod
    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        """
        Extract content token IDs from the input_ids.

        This extracts the non-reasoning content (e.g., everything after
        the </think> tag).

        Args:
            input_ids: The token IDs of the model output.

        Returns:
            The extracted content token IDs.
        """

    @abstractmethod
    def extract_response_outputs(
        self,
        *,
        model_output: str,
        model_output_token_ids: Sequence[int],
        request: ResponsesRequest,
        enable_auto_tools: bool = False,
        tool_call_id_type: str = "random",
        logprobs: list[Logprob] | None = None,
    ) -> list[ResponseOutputItem]:
        """
        Extract reasoning, content, and tool calls from a complete
        model-generated string and return as ResponseOutputItem objects.

        Used for non-streaming responses where we have the entire model
        response available before sending to the client.

        Args:
            model_output: The complete model-generated string.
            model_output_token_ids: The token IDs of the model output.
            request: The request object used to generate the output.
            enable_auto_tools: Whether to enable automatic tool call parsing.
            tool_call_id_type: Type of tool call ID generation ("random", etc).
            logprobs: Pre-computed logprobs for the output text, if any.

        Returns:
            A list of ResponseOutputItem objects.
        """

    @abstractmethod
    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
    ) -> tuple[str | None, str | None]:
        """
        Extract reasoning content from a complete model-generated string.

        Used for non-streaming responses where we have the entire model
        response available before sending to the client.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.

        Returns:
            A tuple of (reasoning, response_content).
        """

    @abstractmethod
    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        """
        Extract reasoning content from a streaming delta message.

        Args:
            previous_text: Text from all previous tokens.
            current_text: Text including the current delta.
            delta_text: The new text in this delta.
            previous_token_ids: Token IDs from previous generation.
            current_token_ids: All token IDs including current.
            delta_token_ids: The new token IDs in this delta.

        Returns:
            A DeltaMessage with reasoning and/or content fields, or None.
        """

    # ========== Tool Parser Methods ==========

    def adjust_request(
        self, request: ChatCompletionRequest | ResponsesRequest
    ) -> ChatCompletionRequest | ResponsesRequest:
        """
        Adjust the request parameters for tool calling.

        Can be overridden by subclasses to modify request parameters
        (e.g., setting structured output schemas for tool calling).

        Args:
            request: The original request.

        Returns:
            The adjusted request.
        """
        return request

    @abstractmethod
    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        """
        Extract tool calls from a complete model-generated string.

        Used for non-streaming responses.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.

        Returns:
            ExtractedToolCallInformation containing the tool calls.
        """

    @abstractmethod
    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        """
        Extract tool calls from a streaming delta message.

        Args:
            previous_text: Text from all previous tokens.
            current_text: Text including the current delta.
            delta_text: The new text in this delta.
            previous_token_ids: Token IDs from previous generation.
            current_token_ids: All token IDs including current.
            delta_token_ids: The new token IDs in this delta.
            request: The request object.

        Returns:
            A DeltaMessage with tool_calls field, or None.
        """

    @abstractmethod
    def parse(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
        enable_auto_tools: bool = False,
    ) -> tuple[str | None, str | None, list[FunctionCall] | None]:
        """Parse a complete model output, extracting reasoning and tool calls.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.
            enable_auto_tools: Whether to enable automatic tool call parsing.

        Returns:
            A tuple of (reasoning, content, tool_calls).
        """

    @abstractmethod
    def parse_delta(
        self,
        delta_text: str,
        delta_token_ids: list[int],
        request: ChatCompletionRequest | ResponsesRequest,
        prompt_token_ids: list[int] | None = None,
        *,
        finished: bool,
    ) -> DeltaMessage | None:
        """Parse a single streaming delta, orchestrating reasoning then
        tool call extraction via internal stream state.
        """

reasoning_parser `property` `writable` ¶

reasoning_parser: ReasoningParser | None

The underlying reasoning parser, if any.

tool_parser `property` `writable` ¶

tool_parser: ToolParser | None

The underlying tool parser, if any.

vocab `cached` `property` ¶

vocab: dict[str, int]

Get the vocabulary mapping from tokens to IDs.

adjust_request ¶

adjust_request(
    request: ChatCompletionRequest | ResponsesRequest,
) -> ChatCompletionRequest | ResponsesRequest

Adjust the request parameters for tool calling.

Can be overridden by subclasses to modify request parameters (e.g., setting structured output schemas for tool calling).

Parameters:

Name	Type	Description	Default
`request`	`ChatCompletionRequest \| ResponsesRequest`	The original request.	required

Returns:

Type	Description
`ChatCompletionRequest \| ResponsesRequest`	The adjusted request.

Source code in vllm/parser/abstract_parser.py

def adjust_request(
    self, request: ChatCompletionRequest | ResponsesRequest
) -> ChatCompletionRequest | ResponsesRequest:
    """
    Adjust the request parameters for tool calling.

    Can be overridden by subclasses to modify request parameters
    (e.g., setting structured output schemas for tool calling).

    Args:
        request: The original request.

    Returns:
        The adjusted request.
    """
    return request

extract_content_ids `abstractmethod` ¶

extract_content_ids(input_ids: list[int]) -> list[int]

Extract content token IDs from the input_ids.

This extracts the non-reasoning content (e.g., everything after the tag).

Parameters:

Name	Type	Description	Default
`input_ids`	`list[int]`	The token IDs of the model output.	required

Returns:

Type	Description
`list[int]`	The extracted content token IDs.

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
    """
    Extract content token IDs from the input_ids.

    This extracts the non-reasoning content (e.g., everything after
    the </think> tag).

    Args:
        input_ids: The token IDs of the model output.

    Returns:
        The extracted content token IDs.
    """

extract_reasoning `abstractmethod` ¶

extract_reasoning(
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]

Extract reasoning content from a complete model-generated string.

Used for non-streaming responses where we have the entire model response available before sending to the client.

Parameters:

Name	Type	Description	Default
`model_output`	`str`	The complete model-generated string.	required
`request`	`ChatCompletionRequest \| ResponsesRequest`	The request object used to generate the output.	required

Returns:

Type	Description
`tuple[str \| None, str \| None]`	A tuple of (reasoning, response_content).

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def extract_reasoning(
    self,
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]:
    """
    Extract reasoning content from a complete model-generated string.

    Used for non-streaming responses where we have the entire model
    response available before sending to the client.

    Args:
        model_output: The complete model-generated string.
        request: The request object used to generate the output.

    Returns:
        A tuple of (reasoning, response_content).
    """

extract_reasoning_streaming `abstractmethod` ¶

extract_reasoning_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None

Extract reasoning content from a streaming delta message.

Parameters:

Name	Type	Description	Default
`previous_text`	`str`	Text from all previous tokens.	required
`current_text`	`str`	Text including the current delta.	required
`delta_text`	`str`	The new text in this delta.	required
`previous_token_ids`	`Sequence[int]`	Token IDs from previous generation.	required
`current_token_ids`	`Sequence[int]`	All token IDs including current.	required
`delta_token_ids`	`Sequence[int]`	The new token IDs in this delta.	required

Returns:

Type	Description
`DeltaMessage \| None`	A DeltaMessage with reasoning and/or content fields, or None.

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def extract_reasoning_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
    """
    Extract reasoning content from a streaming delta message.

    Args:
        previous_text: Text from all previous tokens.
        current_text: Text including the current delta.
        delta_text: The new text in this delta.
        previous_token_ids: Token IDs from previous generation.
        current_token_ids: All token IDs including current.
        delta_token_ids: The new token IDs in this delta.

    Returns:
        A DeltaMessage with reasoning and/or content fields, or None.
    """

extract_response_outputs `abstractmethod` ¶

extract_response_outputs(
    *,
    model_output: str,
    model_output_token_ids: Sequence[int],
    request: ResponsesRequest,
    enable_auto_tools: bool = False,
    tool_call_id_type: str = "random",
    logprobs: list[Logprob] | None = None,
) -> list[ResponseOutputItem]

Extract reasoning, content, and tool calls from a complete model-generated string and return as ResponseOutputItem objects.

Used for non-streaming responses where we have the entire model response available before sending to the client.

Parameters:

Name	Type	Description	Default
`model_output`	`str`	The complete model-generated string.	required
`model_output_token_ids`	`Sequence[int]`	The token IDs of the model output.	required
`request`	`ResponsesRequest`	The request object used to generate the output.	required
`enable_auto_tools`	`bool`	Whether to enable automatic tool call parsing.	`False`
`tool_call_id_type`	`str`	Type of tool call ID generation ("random", etc).	`'random'`
`logprobs`	`list[Logprob] \| None`	Pre-computed logprobs for the output text, if any.	`None`

Returns:

Type	Description
`list[ResponseOutputItem]`	A list of ResponseOutputItem objects.

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def extract_response_outputs(
    self,
    *,
    model_output: str,
    model_output_token_ids: Sequence[int],
    request: ResponsesRequest,
    enable_auto_tools: bool = False,
    tool_call_id_type: str = "random",
    logprobs: list[Logprob] | None = None,
) -> list[ResponseOutputItem]:
    """
    Extract reasoning, content, and tool calls from a complete
    model-generated string and return as ResponseOutputItem objects.

    Used for non-streaming responses where we have the entire model
    response available before sending to the client.

    Args:
        model_output: The complete model-generated string.
        model_output_token_ids: The token IDs of the model output.
        request: The request object used to generate the output.
        enable_auto_tools: Whether to enable automatic tool call parsing.
        tool_call_id_type: Type of tool call ID generation ("random", etc).
        logprobs: Pre-computed logprobs for the output text, if any.

    Returns:
        A list of ResponseOutputItem objects.
    """

extract_tool_calls `abstractmethod` ¶

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract tool calls from a complete model-generated string.

Used for non-streaming responses.

Parameters:

Name	Type	Description	Default
`model_output`	`str`	The complete model-generated string.	required
`request`	`ChatCompletionRequest`	The request object used to generate the output.	required

Returns:

Type	Description
`ExtractedToolCallInformation`	ExtractedToolCallInformation containing the tool calls.

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    """
    Extract tool calls from a complete model-generated string.

    Used for non-streaming responses.

    Args:
        model_output: The complete model-generated string.
        request: The request object used to generate the output.

    Returns:
        ExtractedToolCallInformation containing the tool calls.
    """

extract_tool_calls_streaming `abstractmethod` ¶

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None

Extract tool calls from a streaming delta message.

Parameters:

Name	Type	Description	Default
`previous_text`	`str`	Text from all previous tokens.	required
`current_text`	`str`	Text including the current delta.	required
`delta_text`	`str`	The new text in this delta.	required
`previous_token_ids`	`Sequence[int]`	Token IDs from previous generation.	required
`current_token_ids`	`Sequence[int]`	All token IDs including current.	required
`delta_token_ids`	`Sequence[int]`	The new token IDs in this delta.	required
`request`	`ChatCompletionRequest`	The request object.	required

Returns:

Type	Description
`DeltaMessage \| None`	A DeltaMessage with tool_calls field, or None.

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None:
    """
    Extract tool calls from a streaming delta message.

    Args:
        previous_text: Text from all previous tokens.
        current_text: Text including the current delta.
        delta_text: The new text in this delta.
        previous_token_ids: Token IDs from previous generation.
        current_token_ids: All token IDs including current.
        delta_token_ids: The new token IDs in this delta.
        request: The request object.

    Returns:
        A DeltaMessage with tool_calls field, or None.
    """

is_reasoning_end `abstractmethod` ¶

is_reasoning_end(input_ids: list[int]) -> bool

Check if the reasoning content ends in the input_ids.

Used by structured engines like xgrammar to check if the reasoning content ends in the model output.

Parameters:

Name	Type	Description	Default
`input_ids`	`list[int]`	The token IDs of the model output.	required

Returns:

Type	Description
`bool`	True if the reasoning content ends in the input_ids.

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def is_reasoning_end(self, input_ids: list[int]) -> bool:
    """
    Check if the reasoning content ends in the input_ids.

    Used by structured engines like `xgrammar` to check if the
    reasoning content ends in the model output.

    Args:
        input_ids: The token IDs of the model output.

    Returns:
        True if the reasoning content ends in the input_ids.
    """

is_reasoning_end_streaming ¶

is_reasoning_end_streaming(
    input_ids: list[int], delta_ids: list[int]
) -> bool

Check if the reasoning content ends during a decode step.

Parameters:

Name	Type	Description	Default
`input_ids`	`list[int]`	The entire model output token IDs.	required
`delta_ids`	`list[int]`	The last few computed tokens at the current decode step.	required

Returns:

Type	Description
`bool`	True if the reasoning content ends in the delta_ids.

Source code in vllm/parser/abstract_parser.py

def is_reasoning_end_streaming(
    self, input_ids: list[int], delta_ids: list[int]
) -> bool:
    """
    Check if the reasoning content ends during a decode step.

    Args:
        input_ids: The entire model output token IDs.
        delta_ids: The last few computed tokens at the current decode step.

    Returns:
        True if the reasoning content ends in the delta_ids.
    """
    return self.is_reasoning_end(input_ids)

parse `abstractmethod` ¶

parse(
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
    enable_auto_tools: bool = False,
) -> tuple[
    str | None, str | None, list[FunctionCall] | None
]

Parse a complete model output, extracting reasoning and tool calls.

Parameters:

Name	Type	Description	Default
`model_output`	`str`	The complete model-generated string.	required
`request`	`ChatCompletionRequest \| ResponsesRequest`	The request object used to generate the output.	required
`enable_auto_tools`	`bool`	Whether to enable automatic tool call parsing.	`False`

Returns:

Type	Description
`tuple[str \| None, str \| None, list[FunctionCall] \| None]`	A tuple of (reasoning, content, tool_calls).

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def parse(
    self,
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
    enable_auto_tools: bool = False,
) -> tuple[str | None, str | None, list[FunctionCall] | None]:
    """Parse a complete model output, extracting reasoning and tool calls.

    Args:
        model_output: The complete model-generated string.
        request: The request object used to generate the output.
        enable_auto_tools: Whether to enable automatic tool call parsing.

    Returns:
        A tuple of (reasoning, content, tool_calls).
    """

parse_delta `abstractmethod` ¶

parse_delta(
    delta_text: str,
    delta_token_ids: list[int],
    request: ChatCompletionRequest | ResponsesRequest,
    prompt_token_ids: list[int] | None = None,
    *,
    finished: bool,
) -> DeltaMessage | None

Parse a single streaming delta, orchestrating reasoning then tool call extraction via internal stream state.

Source code in vllm/parser/abstract_parser.py

@abstractmethod
def parse_delta(
    self,
    delta_text: str,
    delta_token_ids: list[int],
    request: ChatCompletionRequest | ResponsesRequest,
    prompt_token_ids: list[int] | None = None,
    *,
    finished: bool,
) -> DeltaMessage | None:
    """Parse a single streaming delta, orchestrating reasoning then
    tool call extraction via internal stream state.
    """

ParserManager ¶

Provides a unified Parser by composing individual reasoning and tool parsers from their respective registries.

Source code in vllm/parser/parser_manager.py

class ParserManager:
    """
    Provides a unified Parser by composing individual reasoning and tool
    parsers from their respective registries.
    """

    @classmethod
    def get_tool_parser(
        cls,
        tool_parser_name: str | None = None,
        enable_auto_tools: bool = False,
        model_name: str | None = None,
    ) -> type[ToolParser] | None:
        """Get the tool parser based on the name."""
        from vllm.tool_parsers import ToolParserManager

        parser: type[ToolParser] | None = None
        if not enable_auto_tools or tool_parser_name is None:
            return parser
        logger.info_once('"auto" tool choice has been enabled.')

        try:
            if (
                tool_parser_name == "pythonic"
                and model_name
                and model_name.startswith("meta-llama/Llama-3.2")
            ):
                logger.warning(
                    "Llama3.2 models may struggle to emit valid pythonic tool calls"
                )
            parser = ToolParserManager.get_tool_parser(tool_parser_name)
        except Exception as e:
            raise TypeError(
                "Error: --enable-auto-tool-choice requires "
                f"tool_parser:'{tool_parser_name}' which has not "
                "been registered"
            ) from e
        return parser

    @classmethod
    def get_reasoning_parser(
        cls,
        reasoning_parser_name: str | None,
    ) -> type[ReasoningParser] | None:
        """Get the reasoning parser based on the name."""
        from vllm.reasoning import ReasoningParserManager

        parser: type[ReasoningParser] | None = None
        if not reasoning_parser_name:
            return None
        try:
            parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
            assert parser is not None
        except Exception as e:
            raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
        return parser

    @classmethod
    def get_parser(
        cls,
        tool_parser_name: str | None = None,
        reasoning_parser_name: str | None = None,
        enable_auto_tools: bool = False,
        model_name: str | None = None,
    ) -> type[Parser] | None:
        """
        Get a Parser that handles both reasoning and tool parsing.

        Composes individual reasoning and tool parsers into a single
        DelegatingParser subclass.

        Args:
            tool_parser_name: The name of the tool parser.
            reasoning_parser_name: The name of the reasoning parser.
            enable_auto_tools: Whether auto tool choice is enabled.
            model_name: The model name for parser-specific warnings.

        Returns:
            A Parser class, or None if neither parser is specified.
        """
        if not tool_parser_name and not reasoning_parser_name:
            return None

        reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
        tool_parser_cls = cls.get_tool_parser(
            tool_parser_name, enable_auto_tools, model_name
        )

        if reasoning_parser_cls is None and tool_parser_cls is None:
            return None

        from vllm.parser.abstract_parser import DelegatingParser

        r_cls = reasoning_parser_cls
        t_cls = tool_parser_cls

        class _Parser(DelegatingParser):
            reasoning_parser_cls = r_cls
            tool_parser_cls = t_cls

        return _Parser

get_parser `classmethod` ¶

get_parser(
    tool_parser_name: str | None = None,
    reasoning_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[Parser] | None

Get a Parser that handles both reasoning and tool parsing.

Composes individual reasoning and tool parsers into a single DelegatingParser subclass.

Parameters:

Name	Type	Description	Default
`tool_parser_name`	`str \| None`	The name of the tool parser.	`None`
`reasoning_parser_name`	`str \| None`	The name of the reasoning parser.	`None`
`enable_auto_tools`	`bool`	Whether auto tool choice is enabled.	`False`
`model_name`	`str \| None`	The model name for parser-specific warnings.	`None`

Returns:

Type	Description
`type[Parser] \| None`	A Parser class, or None if neither parser is specified.

Source code in vllm/parser/parser_manager.py

@classmethod
def get_parser(
    cls,
    tool_parser_name: str | None = None,
    reasoning_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[Parser] | None:
    """
    Get a Parser that handles both reasoning and tool parsing.

    Composes individual reasoning and tool parsers into a single
    DelegatingParser subclass.

    Args:
        tool_parser_name: The name of the tool parser.
        reasoning_parser_name: The name of the reasoning parser.
        enable_auto_tools: Whether auto tool choice is enabled.
        model_name: The model name for parser-specific warnings.

    Returns:
        A Parser class, or None if neither parser is specified.
    """
    if not tool_parser_name and not reasoning_parser_name:
        return None

    reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
    tool_parser_cls = cls.get_tool_parser(
        tool_parser_name, enable_auto_tools, model_name
    )

    if reasoning_parser_cls is None and tool_parser_cls is None:
        return None

    from vllm.parser.abstract_parser import DelegatingParser

    r_cls = reasoning_parser_cls
    t_cls = tool_parser_cls

    class _Parser(DelegatingParser):
        reasoning_parser_cls = r_cls
        tool_parser_cls = t_cls

    return _Parser

get_reasoning_parser `classmethod` ¶

get_reasoning_parser(
    reasoning_parser_name: str | None,
) -> type[ReasoningParser] | None

Get the reasoning parser based on the name.

Source code in vllm/parser/parser_manager.py

@classmethod
def get_reasoning_parser(
    cls,
    reasoning_parser_name: str | None,
) -> type[ReasoningParser] | None:
    """Get the reasoning parser based on the name."""
    from vllm.reasoning import ReasoningParserManager

    parser: type[ReasoningParser] | None = None
    if not reasoning_parser_name:
        return None
    try:
        parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
        assert parser is not None
    except Exception as e:
        raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
    return parser

get_tool_parser `classmethod` ¶

get_tool_parser(
    tool_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[ToolParser] | None

Get the tool parser based on the name.

Source code in vllm/parser/parser_manager.py

@classmethod
def get_tool_parser(
    cls,
    tool_parser_name: str | None = None,
    enable_auto_tools: bool = False,
    model_name: str | None = None,
) -> type[ToolParser] | None:
    """Get the tool parser based on the name."""
    from vllm.tool_parsers import ToolParserManager

    parser: type[ToolParser] | None = None
    if not enable_auto_tools or tool_parser_name is None:
        return parser
    logger.info_once('"auto" tool choice has been enabled.')

    try:
        if (
            tool_parser_name == "pythonic"
            and model_name
            and model_name.startswith("meta-llama/Llama-3.2")
        ):
            logger.warning(
                "Llama3.2 models may struggle to emit valid pythonic tool calls"
            )
        parser = ToolParserManager.get_tool_parser(tool_parser_name)
    except Exception as e:
        raise TypeError(
            "Error: --enable-auto-tool-choice requires "
            f"tool_parser:'{tool_parser_name}' which has not "
            "been registered"
        ) from e
    return parser

vllm.parser ¶

DelegatingParser ¶

_append_unstreamed_tool_args ¶

_parse_tool_calls ¶

Parser ¶

reasoning_parser property writable ¶

tool_parser property writable ¶

vocab cached property ¶

adjust_request ¶

extract_content_ids abstractmethod ¶

extract_reasoning abstractmethod ¶

extract_reasoning_streaming abstractmethod ¶

extract_response_outputs abstractmethod ¶

extract_tool_calls abstractmethod ¶

extract_tool_calls_streaming abstractmethod ¶

is_reasoning_end abstractmethod ¶

is_reasoning_end_streaming ¶

parse abstractmethod ¶

parse_delta abstractmethod ¶

ParserManager ¶

get_parser classmethod ¶

get_reasoning_parser classmethod ¶

get_tool_parser classmethod ¶

reasoning_parser `property` `writable` ¶

tool_parser `property` `writable` ¶

vocab `cached` `property` ¶

extract_content_ids `abstractmethod` ¶

extract_reasoning `abstractmethod` ¶

extract_reasoning_streaming `abstractmethod` ¶

extract_response_outputs `abstractmethod` ¶

extract_tool_calls `abstractmethod` ¶

extract_tool_calls_streaming `abstractmethod` ¶

is_reasoning_end `abstractmethod` ¶

parse `abstractmethod` ¶

parse_delta `abstractmethod` ¶

get_parser `classmethod` ¶

get_reasoning_parser `classmethod` ¶

get_tool_parser `classmethod` ¶