PlatoTask
The PlatoTask
class defines the structure and evaluation criteria for tasks in Plato environments.
Task Structure
from plato.models import PlatoTask
task = PlatoTask(
name: str , # Task identifier
prompt: str , # Task instructions
start_url: str , # Initial URL
eval_config: EvalConfig, # Evaluation configuration
extra: dict = {} # Additional task-specific data
)
Creating Tasks
Basic Task
task = PlatoTask(
name = "order_pizza" ,
prompt = "Order a large pepperoni pizza from DoorDash" ,
start_url = "https://doordash.com" ,
eval_config = CustomEvalConfig(
score_fn = your_eval_function
)
)
Custom Evaluation
async def evaluate_order ( state : dict ) -> tuple[ bool , str ]:
"""Custom evaluation function for DoorDash order."""
try :
# Check order status
order_complete = state.get( "order_status" ) == "complete"
cart_items = state.get( "cart" , [])
# Verify order contents
has_pizza = any (
item.get( "name" , "" ).lower().contains( "pepperoni" )
for item in cart_items
)
if order_complete and has_pizza:
return True , "Order completed successfully"
return False , "Order incomplete or missing items"
except Exception as e:
return False , f "Evaluation error: { str (e) } "
# Create task with custom evaluation
task = PlatoTask(
name = "order_pizza" ,
prompt = "Order a large pepperoni pizza" ,
start_url = "https://doordash.com" ,
eval_config = CustomEvalConfig(
score_fn = evaluate_order
)
)
Evaluation Configuration
Custom Evaluation CustomEvalConfig(
score_fn = callable
)
Define custom evaluation logic
State Access Evaluation functions receive current environment state for verification
State Structure
The state object passed to evaluation functions contains:
{
"url" : str , # Current page URL
"title" : str , # Page title
"content" : str , # Page content
"elements" : List[ dict ], # Interactive elements
"mutations" : List[ dict ], # State changes
"custom" : dict , # Task-specific data
}
Using Tasks
With Environment
# Create environment with task
env = client.make_environment( "doordash" )
await env.reset( task = task)
# Run task and evaluate
result = await env.evaluate()
print ( f "Task succeeded: { result.success } " )
if not result.success:
print ( f "Reason: { result.reason } " )
Loading Predefined Tasks
# Load tasks for specific environment
tasks = await client.load_tasks( "doordash" )
# Use first available task
task = tasks[ 0 ]
await env.reset( task = task)
Evaluation functions should be deterministic and handle all possible state scenarios to ensure reliable task verification.