class Olmo3PythonicToolParser(ToolParser):
"""
Tool call parser for Olmo 3 models that produce tool calls as
newline-separated pythonic strings.
Used when --enable-auto-tool-choice --tool-call-parser pythonic are all set
Code copied from pythonic_tool_parser.py and updated to handle
- newline separated pythonic tool calls.
- argument values being null/true/false instead of Pythonic literals.
"""
# TODO(mdepinet): Possible future improvements:
# 1. Support text + tools separated by either <|python_tag|> or \n\n
# 2. Support tools outside of a list (or separated by a semicolon).
# This depends on item 1 for consistent streaming.
# Neither of these are necessary for e.g. ToolACE, but both would help make
# Llama3.2 models more reliable.
TOOL_CALL_REGEX = re.compile(
r"\[([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s)?\),\s*)*([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s*)?\)\s*)+\]",
re.DOTALL,
)
def __init__(self, tokenizer: PreTrainedTokenizerBase):
super().__init__(tokenizer)
# Rename for readability. This is NOT a tool id.
@property
def current_tool_index(self) -> int:
return self.current_tool_id
@current_tool_index.setter
def current_tool_index(self, value: int) -> None:
self.current_tool_id = value
def extract_tool_calls(
self, model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation:
"""
Extract the tool calls from a complete model response.
"""
original_model_output = model_output
# Remove xml tags.
match = re.search(
r"<function_calls>(.*?)</function_calls>", model_output, re.DOTALL
)
if match:
model_output = match.group(1).strip()
# Make the newline separated function calls into a list.
model_output = ", ".join(
[line.strip() for line in model_output.splitlines() if line.strip()]
)
model_output = f"[{model_output}]"
is_tool_call_pattern = False
try:
is_tool_call_pattern = (
self.TOOL_CALL_REGEX.match(
model_output, timeout=envs.VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS
)
is not None
)
except TimeoutError:
logger.warning("Regex timeout occurred when matching tool call pattern.")
logger.debug(
"Regex timeout occurred when matching user input: %s", model_output
)
if not is_tool_call_pattern:
return ExtractedToolCallInformation(
tools_called=False, tool_calls=[], content=original_model_output
)
try:
module = ast.parse(model_output)
parsed = getattr(module.body[0], "value", None)
if isinstance(parsed, ast.List) and all(
isinstance(e, ast.Call) for e in parsed.elts
):
return ExtractedToolCallInformation(
tools_called=True,
tool_calls=[
_handle_single_tool(e) # type: ignore
for e in parsed.elts
],
content=None,
)
else:
raise _UnexpectedAstError(
"Tool output must be a list of function calls"
)
except Exception:
logger.exception("Error in extracting tool call from response.")
# Treat as regular text
return ExtractedToolCallInformation(
tools_called=False, tool_calls=[], content=original_model_output
)
def extract_tool_calls_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
request: ChatCompletionRequest,
) -> DeltaMessage | None:
# All function calls start with the <function_calls> tag.
# But since this is streaming, we may have seen only part of the tag.
if not current_text.startswith("<"):
return DeltaMessage(content=delta_text)
try:
# Remove xml tags.
if current_text.startswith("<function_calls>"):
current_text = current_text[len("<function_calls>") :]
if current_text.endswith("</function_calls>"):
current_text = current_text[: -len("</function_calls>")]
valid_and_added_text = _make_valid_python(current_text)
if valid_and_added_text is None:
return None
valid_text, added_text = valid_and_added_text
# Make the newline separated function calls into a list.
valid_text = ", ".join(
[line.strip() for line in valid_text.splitlines() if line.strip()]
)
valid_text = f"[{valid_text}]"
module = ast.parse(valid_text)
parsed = getattr(module.body[0], "value", None)
if not isinstance(parsed, ast.List) or not all(
isinstance(e, ast.Call) for e in parsed.elts
):
raise _UnexpectedAstError(
"Tool output must be a sequence of newline-separated calls"
)
tool_calls = [
_handle_single_tool(e) # type: ignore
for e in parsed.elts
]
tool_deltas = []
for index, new_call in enumerate(tool_calls):
if index < self.current_tool_index:
continue
self.current_tool_index = index
if len(self.streamed_args_for_tool) == index:
self.streamed_args_for_tool.append("")
new_call_complete = index < len(tool_calls) - 1 or ")" not in added_text
if new_call_complete:
self.current_tool_index += 1
withheld_suffix = added_text[:-1] if not new_call_complete else ""
if not new_call_complete and added_text[-1] == ")":
# Function call is incomplete. Withhold the closing bracket.
withheld_suffix = withheld_suffix + "}"
# Strings get single quotes in the model-produced string.
# JSON requires double quotes.
withheld_suffix = withheld_suffix.replace("'", '"')
delta = _compute_tool_delta(
self.streamed_args_for_tool[index], new_call, index, withheld_suffix
)
if delta is not None:
tool_deltas.append(delta)
if (
delta.function is not None
and delta.function.arguments is not None
):
self.streamed_args_for_tool[index] += delta.function.arguments
# HACK: serving_chat.py inspects the internal state of tool parsers
# when determining its final streaming delta, automatically
# adding autocompleted JSON.
# These two lines avoid that nonsense while ensuring finish_reason
# is set to tool_calls when at least one tool is called.
if tool_deltas and not self.prev_tool_call_arr:
self.prev_tool_call_arr = [{"arguments": {}}]
if tool_deltas:
return DeltaMessage(tool_calls=tool_deltas)
elif not added_text and self.current_tool_id > 0:
# Return an empty DeltaMessage once the tool calls are all done
# so that finish_reason gets set.
return DeltaMessage(content="")
else:
return None
except Exception:
logger.exception("Error trying to handle streaming tool call.")
logger.debug(
"Skipping chunk as a result of tool streaming extraction error"
)
return None