Module scenario.types

Expand source code
from enum import Enum
from pydantic import BaseModel, SkipValidation
from typing import (
    TYPE_CHECKING,
    Annotated,
    Any,
    Awaitable,
    Callable,
    List,
    Optional,
    Union,
)

from openai.types.chat import ChatCompletionMessageParam, ChatCompletionUserMessageParam

# Prevent circular imports + Pydantic breaking
if TYPE_CHECKING:
    from scenario.scenario_executor import ScenarioState

    ScenarioStateType = ScenarioState
else:
    ScenarioStateType = Any


class AgentRole(Enum):
    """
    Defines the different roles that agents can play in a scenario.

    This enum is used to identify the role of each agent during scenario execution,
    enabling the framework to determine the order and interaction patterns between
    different types of agents.

    Attributes:
        USER: Represents a user simulator agent that generates user inputs
        AGENT: Represents the agent under test that responds to user inputs
        JUDGE: Represents a judge agent that evaluates the conversation and determines success/failure
    """

    USER = "User"
    AGENT = "Agent"
    JUDGE = "Judge"


class AgentInput(BaseModel):
    """
    Input data structure passed to agent adapters during scenario execution.

    This class encapsulates all the information an agent needs to generate its next response,
    including conversation history, thread context, and scenario state. It provides convenient
    methods to access the most recent user messages.

    Attributes:
        thread_id: Unique identifier for the conversation thread
        messages: Complete conversation history as OpenAI-compatible messages
        new_messages: Only the new messages since the agent's last call
        judgment_request: Whether this call is requesting a judgment from a judge agent
        scenario_state: Current state of the scenario execution

    Example:
        ```
        class MyAgent(AgentAdapter):
            async def call(self, input: AgentInput) -> str:
                # Get the latest user message
                user_msg = input.last_new_user_message_str()

                # Process with your LLM/agent
                response = await my_llm.complete(
                    messages=input.messages,
                    prompt=user_msg
                )

                return response
        ```
    """

    thread_id: str
    # Prevent pydantic from validating/parsing the messages and causing issues: https://github.com/pydantic/pydantic/issues/9541
    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
    new_messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
    judgment_request: bool = False
    scenario_state: ScenarioStateType

    def last_new_user_message(self) -> ChatCompletionUserMessageParam:
        """
        Get the most recent user message from the new messages.

        Returns:
            The last user message in OpenAI message format

        Raises:
            ValueError: If no new user messages are found

        Example:
            ```
            user_message = input.last_new_user_message()
            content = user_message["content"]
            ```
        """
        user_messages = [m for m in self.new_messages if m["role"] == "user"]
        if not user_messages:
            raise ValueError(
                "No new user messages found, did you mean to call the assistant twice? Perhaps change your adapter to use the full messages list instead."
            )
        return user_messages[-1]

    def last_new_user_message_str(self) -> str:
        """
        Get the content of the most recent user message as a string.

        This is a convenience method for getting simple text content from user messages.
        For multimodal messages or complex content, use last_new_user_message() instead.

        Returns:
            The text content of the last user message

        Raises:
            ValueError: If no new user messages found or if the message content is not a string

        Example:
            ```
            user_text = input.last_new_user_message_str()
            response = f"You said: {user_text}"
            ```
        """
        content = self.last_new_user_message()["content"]
        if type(content) != str:
            raise ValueError(
                f"Last user message is not a string: {content.__repr__()}. Please use the full messages list instead."
            )
        return content


class ScenarioResult(BaseModel):
    """
    Represents the final result of a scenario test execution.

    This class contains all the information about how a scenario performed,
    including whether it succeeded, the conversation that took place, and
    detailed reasoning about which criteria were met or failed.

    Attributes:
        success: Whether the scenario passed all criteria and completed successfully
        messages: Complete conversation history that occurred during the scenario
        reasoning: Detailed explanation of why the scenario succeeded or failed
        passed_criteria: List of success criteria that were satisfied
        failed_criteria: List of success criteria that were not satisfied
        total_time: Total execution time in seconds (if measured)
        agent_time: Time spent in agent calls in seconds (if measured)

    Example:
        ```
        result = await scenario.run(
            name="weather query",
            description="User asks about weather",
            agents=[
                weather_agent,
                scenario.UserSimulatorAgent(),
                scenario.JudgeAgent(criteria=["Agent provides helpful weather information"])
            ]
        )

        print(f"Test {'PASSED' if result.success else 'FAILED'}")
        print(f"Reasoning: {result.reasoning}")

        if not result.success:
            print("Failed criteria:")
            for criteria in result.failed_criteria:
                print(f"  - {criteria}")
        ```
    """

    success: bool
    # Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
    reasoning: Optional[str] = None
    passed_criteria: List[str] = []
    failed_criteria: List[str] = []
    total_time: Optional[float] = None
    agent_time: Optional[float] = None

    def __repr__(self) -> str:
        """
        Provide a concise representation for debugging and logging.

        Returns:
            A string representation showing success status and reasoning
        """
        status = "PASSED" if self.success else "FAILED"
        return f"ScenarioResult(success={self.success}, status={status}, reasoning='{self.reasoning or 'None'}')"


AgentReturnTypes = Union[
    str, ChatCompletionMessageParam, List[ChatCompletionMessageParam], ScenarioResult
]
"""
Union type representing all valid return types for agent adapter call methods.

Agent adapters can return any of these types:

- str: Simple text response

- ChatCompletionMessageParam: Single OpenAI-compatible message

- List[ChatCompletionMessageParam]: Multiple OpenAI-compatible messages (for multi-step responses)

- ScenarioResult: Direct test result (typically used by judge agents to end scenarios)

Example:
    ```
    class MyAgent(AgentAdapter):
        async def call(self, input: AgentInput) -> AgentReturnTypes:
            # Can return a simple string
            return "Hello, how can I help you?"

            # Or a structured message
            return {"role": "assistant", "content": "Hello!"}

            # Or multiple messages for complex interactions
            return [
                {"role": "assistant", "content": "Let me search for that..."},
                {"role": "assistant", "content": "Here's what I found: ..."}
            ]
    ```
"""

# TODO: remove the optional ScenarioResult return type from here, use events instead
ScriptStep = Union[
    Callable[["ScenarioState"], None],
    Callable[["ScenarioState"], Optional[ScenarioResult]],
    # Async as well
    Callable[["ScenarioState"], Awaitable[None]],
    Callable[["ScenarioState"], Awaitable[Optional[ScenarioResult]]],
]
"""
Union type for script step functions used in scenario scripts.

Script steps are functions that can be called during scenario execution to control
the flow, add custom assertions, or perform evaluations. They receive the current
scenario state and can optionally return a result to end the scenario.

The functions can be either synchronous or asynchronous.

Example:
    ```
    def check_tool_call(state: ScenarioState) -> None:
        assert state.has_tool_call("get_weather")

    async def custom_evaluation(state: ScenarioState) -> Optional[ScenarioResult]:
        eval_result = await some_external_evaluator(state.messages)
        if not eval_result.passed:
            return ScenarioResult(
                success=False,
                messages=state.messages,
                reasoning="Custom evaluation failed"
            )
        return None  # Continue scenario

    # Use in script
    result = await scenario.run(
        name="test",
        description="Test scenario",
        agents=[
            MyAgent(),
            scenario.UserSimulatorAgent(),
            scenario.JudgeAgent(criteria=["Agent provides helpful response"])
        ],
        script=[
            scenario.user("What's the weather?"),
            scenario.agent(),
            check_tool_call,
            custom_evaluation,
            scenario.succeed()
        ]
    )
    ```
"""

Global variables

var AgentReturnTypes

Union type representing all valid return types for agent adapter call methods.

Agent adapters can return any of these types:

  • str: Simple text response

  • ChatCompletionMessageParam: Single OpenAI-compatible message

  • List[ChatCompletionMessageParam]: Multiple OpenAI-compatible messages (for multi-step responses)

  • ScenarioResult: Direct test result (typically used by judge agents to end scenarios)

Example

class MyAgent(AgentAdapter):
    async def call(self, input: AgentInput) -> AgentReturnTypes:
        # Can return a simple string
        return "Hello, how can I help you?"

        # Or a structured message
        return {"role": "assistant", "content": "Hello!"}

        # Or multiple messages for complex interactions
        return [
            {"role": "assistant", "content": "Let me search for that..."},
            {"role": "assistant", "content": "Here's what I found: ..."}
        ]
var ScriptStep

Union type for script step functions used in scenario scripts.

Script steps are functions that can be called during scenario execution to control the flow, add custom assertions, or perform evaluations. They receive the current scenario state and can optionally return a result to end the scenario.

The functions can be either synchronous or asynchronous.

Example

def check_tool_call(state: ScenarioState) -> None:
    assert state.has_tool_call("get_weather")

async def custom_evaluation(state: ScenarioState) -> Optional[ScenarioResult]:
    eval_result = await some_external_evaluator(state.messages)
    if not eval_result.passed:
        return ScenarioResult(
            success=False,
            messages=state.messages,
            reasoning="Custom evaluation failed"
        )
    return None  # Continue scenario

# Use in script
result = await scenario.run(
    name="test",
    description="Test scenario",
    agents=[
        MyAgent(),
        scenario.UserSimulatorAgent(),
        scenario.JudgeAgent(criteria=["Agent provides helpful response"])
    ],
    script=[
        scenario.user("What's the weather?"),
        scenario.agent(),
        check_tool_call,
        custom_evaluation,
        scenario.succeed()
    ]
)

Classes

class AgentInput (**data: Any)

Input data structure passed to agent adapters during scenario execution.

This class encapsulates all the information an agent needs to generate its next response, including conversation history, thread context, and scenario state. It provides convenient methods to access the most recent user messages.

Attributes

thread_id
Unique identifier for the conversation thread
messages
Complete conversation history as OpenAI-compatible messages
new_messages
Only the new messages since the agent's last call
judgment_request
Whether this call is requesting a judgment from a judge agent
scenario_state
Current state of the scenario execution

Example

class MyAgent(AgentAdapter):
    async def call(self, input: AgentInput) -> str:
        # Get the latest user message
        user_msg = input.last_new_user_message_str()

        # Process with your LLM/agent
        response = await my_llm.complete(
            messages=input.messages,
            prompt=user_msg
        )

        return response

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code
class AgentInput(BaseModel):
    """
    Input data structure passed to agent adapters during scenario execution.

    This class encapsulates all the information an agent needs to generate its next response,
    including conversation history, thread context, and scenario state. It provides convenient
    methods to access the most recent user messages.

    Attributes:
        thread_id: Unique identifier for the conversation thread
        messages: Complete conversation history as OpenAI-compatible messages
        new_messages: Only the new messages since the agent's last call
        judgment_request: Whether this call is requesting a judgment from a judge agent
        scenario_state: Current state of the scenario execution

    Example:
        ```
        class MyAgent(AgentAdapter):
            async def call(self, input: AgentInput) -> str:
                # Get the latest user message
                user_msg = input.last_new_user_message_str()

                # Process with your LLM/agent
                response = await my_llm.complete(
                    messages=input.messages,
                    prompt=user_msg
                )

                return response
        ```
    """

    thread_id: str
    # Prevent pydantic from validating/parsing the messages and causing issues: https://github.com/pydantic/pydantic/issues/9541
    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
    new_messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
    judgment_request: bool = False
    scenario_state: ScenarioStateType

    def last_new_user_message(self) -> ChatCompletionUserMessageParam:
        """
        Get the most recent user message from the new messages.

        Returns:
            The last user message in OpenAI message format

        Raises:
            ValueError: If no new user messages are found

        Example:
            ```
            user_message = input.last_new_user_message()
            content = user_message["content"]
            ```
        """
        user_messages = [m for m in self.new_messages if m["role"] == "user"]
        if not user_messages:
            raise ValueError(
                "No new user messages found, did you mean to call the assistant twice? Perhaps change your adapter to use the full messages list instead."
            )
        return user_messages[-1]

    def last_new_user_message_str(self) -> str:
        """
        Get the content of the most recent user message as a string.

        This is a convenience method for getting simple text content from user messages.
        For multimodal messages or complex content, use last_new_user_message() instead.

        Returns:
            The text content of the last user message

        Raises:
            ValueError: If no new user messages found or if the message content is not a string

        Example:
            ```
            user_text = input.last_new_user_message_str()
            response = f"You said: {user_text}"
            ```
        """
        content = self.last_new_user_message()["content"]
        if type(content) != str:
            raise ValueError(
                f"Last user message is not a string: {content.__repr__()}. Please use the full messages list instead."
            )
        return content

Ancestors

  • pydantic.main.BaseModel

Class variables

var judgment_request : bool

The type of the None singleton.

var messages : List[openai.types.chat.chat_completion_developer_message_param.ChatCompletionDeveloperMessageParam | openai.types.chat.chat_completion_system_message_param.ChatCompletionSystemMessageParam | openai.types.chat.chat_completion_user_message_param.ChatCompletionUserMessageParam | openai.types.chat.chat_completion_assistant_message_param.ChatCompletionAssistantMessageParam | openai.types.chat.chat_completion_tool_message_param.ChatCompletionToolMessageParam | openai.types.chat.chat_completion_function_message_param.ChatCompletionFunctionMessageParam]

The type of the None singleton.

var model_config

The type of the None singleton.

var new_messages : List[openai.types.chat.chat_completion_developer_message_param.ChatCompletionDeveloperMessageParam | openai.types.chat.chat_completion_system_message_param.ChatCompletionSystemMessageParam | openai.types.chat.chat_completion_user_message_param.ChatCompletionUserMessageParam | openai.types.chat.chat_completion_assistant_message_param.ChatCompletionAssistantMessageParam | openai.types.chat.chat_completion_tool_message_param.ChatCompletionToolMessageParam | openai.types.chat.chat_completion_function_message_param.ChatCompletionFunctionMessageParam]

The type of the None singleton.

var scenario_state : Any

The type of the None singleton.

var thread_id : str

The type of the None singleton.

Methods

def last_new_user_message(self) ‑> openai.types.chat.chat_completion_user_message_param.ChatCompletionUserMessageParam

Get the most recent user message from the new messages.

Returns

The last user message in OpenAI message format

Raises

ValueError
If no new user messages are found

Example

user_message = input.last_new_user_message()
content = user_message["content"]
Expand source code
def last_new_user_message(self) -> ChatCompletionUserMessageParam:
    """
    Get the most recent user message from the new messages.

    Returns:
        The last user message in OpenAI message format

    Raises:
        ValueError: If no new user messages are found

    Example:
        ```
        user_message = input.last_new_user_message()
        content = user_message["content"]
        ```
    """
    user_messages = [m for m in self.new_messages if m["role"] == "user"]
    if not user_messages:
        raise ValueError(
            "No new user messages found, did you mean to call the assistant twice? Perhaps change your adapter to use the full messages list instead."
        )
    return user_messages[-1]
def last_new_user_message_str(self) ‑> str

Get the content of the most recent user message as a string.

This is a convenience method for getting simple text content from user messages. For multimodal messages or complex content, use last_new_user_message() instead.

Returns

The text content of the last user message

Raises

ValueError
If no new user messages found or if the message content is not a string

Example

user_text = input.last_new_user_message_str()
response = f"You said: {user_text}"
Expand source code
def last_new_user_message_str(self) -> str:
    """
    Get the content of the most recent user message as a string.

    This is a convenience method for getting simple text content from user messages.
    For multimodal messages or complex content, use last_new_user_message() instead.

    Returns:
        The text content of the last user message

    Raises:
        ValueError: If no new user messages found or if the message content is not a string

    Example:
        ```
        user_text = input.last_new_user_message_str()
        response = f"You said: {user_text}"
        ```
    """
    content = self.last_new_user_message()["content"]
    if type(content) != str:
        raise ValueError(
            f"Last user message is not a string: {content.__repr__()}. Please use the full messages list instead."
        )
    return content
class AgentRole (*args, **kwds)

Defines the different roles that agents can play in a scenario.

This enum is used to identify the role of each agent during scenario execution, enabling the framework to determine the order and interaction patterns between different types of agents.

Attributes

USER
Represents a user simulator agent that generates user inputs
AGENT
Represents the agent under test that responds to user inputs
JUDGE
Represents a judge agent that evaluates the conversation and determines success/failure
Expand source code
class AgentRole(Enum):
    """
    Defines the different roles that agents can play in a scenario.

    This enum is used to identify the role of each agent during scenario execution,
    enabling the framework to determine the order and interaction patterns between
    different types of agents.

    Attributes:
        USER: Represents a user simulator agent that generates user inputs
        AGENT: Represents the agent under test that responds to user inputs
        JUDGE: Represents a judge agent that evaluates the conversation and determines success/failure
    """

    USER = "User"
    AGENT = "Agent"
    JUDGE = "Judge"

Ancestors

  • enum.Enum

Class variables

var AGENT

The type of the None singleton.

var JUDGE

The type of the None singleton.

var USER

The type of the None singleton.

class ScenarioResult (**data: Any)

Represents the final result of a scenario test execution.

This class contains all the information about how a scenario performed, including whether it succeeded, the conversation that took place, and detailed reasoning about which criteria were met or failed.

Attributes

success
Whether the scenario passed all criteria and completed successfully
messages
Complete conversation history that occurred during the scenario
reasoning
Detailed explanation of why the scenario succeeded or failed
passed_criteria
List of success criteria that were satisfied
failed_criteria
List of success criteria that were not satisfied
total_time
Total execution time in seconds (if measured)
agent_time
Time spent in agent calls in seconds (if measured)

Example

result = await scenario.run(
    name="weather query",
    description="User asks about weather",
    agents=[
        weather_agent,
        scenario.UserSimulatorAgent(),
        scenario.JudgeAgent(criteria=["Agent provides helpful weather information"])
    ]
)

print(f"Test {'PASSED' if result.success else 'FAILED'}")
print(f"Reasoning: {result.reasoning}")

if not result.success:
    print("Failed criteria:")
    for criteria in result.failed_criteria:
        print(f"  - {criteria}")

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code
class ScenarioResult(BaseModel):
    """
    Represents the final result of a scenario test execution.

    This class contains all the information about how a scenario performed,
    including whether it succeeded, the conversation that took place, and
    detailed reasoning about which criteria were met or failed.

    Attributes:
        success: Whether the scenario passed all criteria and completed successfully
        messages: Complete conversation history that occurred during the scenario
        reasoning: Detailed explanation of why the scenario succeeded or failed
        passed_criteria: List of success criteria that were satisfied
        failed_criteria: List of success criteria that were not satisfied
        total_time: Total execution time in seconds (if measured)
        agent_time: Time spent in agent calls in seconds (if measured)

    Example:
        ```
        result = await scenario.run(
            name="weather query",
            description="User asks about weather",
            agents=[
                weather_agent,
                scenario.UserSimulatorAgent(),
                scenario.JudgeAgent(criteria=["Agent provides helpful weather information"])
            ]
        )

        print(f"Test {'PASSED' if result.success else 'FAILED'}")
        print(f"Reasoning: {result.reasoning}")

        if not result.success:
            print("Failed criteria:")
            for criteria in result.failed_criteria:
                print(f"  - {criteria}")
        ```
    """

    success: bool
    # Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
    reasoning: Optional[str] = None
    passed_criteria: List[str] = []
    failed_criteria: List[str] = []
    total_time: Optional[float] = None
    agent_time: Optional[float] = None

    def __repr__(self) -> str:
        """
        Provide a concise representation for debugging and logging.

        Returns:
            A string representation showing success status and reasoning
        """
        status = "PASSED" if self.success else "FAILED"
        return f"ScenarioResult(success={self.success}, status={status}, reasoning='{self.reasoning or 'None'}')"

Ancestors

  • pydantic.main.BaseModel

Class variables

var agent_time : float | None

The type of the None singleton.

var failed_criteria : List[str]

The type of the None singleton.

var messages : List[openai.types.chat.chat_completion_developer_message_param.ChatCompletionDeveloperMessageParam | openai.types.chat.chat_completion_system_message_param.ChatCompletionSystemMessageParam | openai.types.chat.chat_completion_user_message_param.ChatCompletionUserMessageParam | openai.types.chat.chat_completion_assistant_message_param.ChatCompletionAssistantMessageParam | openai.types.chat.chat_completion_tool_message_param.ChatCompletionToolMessageParam | openai.types.chat.chat_completion_function_message_param.ChatCompletionFunctionMessageParam]

The type of the None singleton.

var model_config

The type of the None singleton.

var passed_criteria : List[str]

The type of the None singleton.

var reasoning : str | None

The type of the None singleton.

var success : bool

The type of the None singleton.

var total_time : float | None

The type of the None singleton.