Task configuration and evaluation for Plato environments
PlatoTask
from plato.models import PlatoTask task = PlatoTask( name: str, # Task identifier prompt: str, # Task instructions start_url: str, # Initial URL eval_config: EvalConfig, # Evaluation configuration extra: dict = {} # Additional task-specific data )
task = PlatoTask( name="order_pizza", prompt="Order a large pepperoni pizza from DoorDash", start_url="https://doordash.com", eval_config=CustomEvalConfig( score_fn=your_eval_function ) )
async def evaluate_order(state: dict) -> tuple[bool, str]: """Custom evaluation function for DoorDash order.""" try: # Check order status order_complete = state.get("order_status") == "complete" cart_items = state.get("cart", []) # Verify order contents has_pizza = any( item.get("name", "").lower().contains("pepperoni") for item in cart_items ) if order_complete and has_pizza: return True, "Order completed successfully" return False, "Order incomplete or missing items" except Exception as e: return False, f"Evaluation error: {str(e)}" # Create task with custom evaluation task = PlatoTask( name="order_pizza", prompt="Order a large pepperoni pizza", start_url="https://doordash.com", eval_config=CustomEvalConfig( score_fn=evaluate_order ) )
CustomEvalConfig( score_fn=callable )
{ "url": str, # Current page URL "title": str, # Page title "content": str, # Page content "elements": List[dict], # Interactive elements "mutations": List[dict], # State changes "custom": dict, # Task-specific data }
# Create environment with task env = client.make_environment("doordash") await env.reset(task=task) # Run task and evaluate result = await env.evaluate() print(f"Task succeeded: {result.success}") if not result.success: print(f"Reason: {result.reason}")
# Load tasks for specific environment tasks = await client.load_tasks("doordash") # Use first available task task = tasks[0] await env.reset(task=task)