Production

Production Agents

Build reliable, observable, and safe AI agents ready for production deployment.

Production Challenges

Building an agent that works in a demo is different from one that works reliably in production:

Reliability: LLMs hallucinate and make mistakes
Cost: Unbounded loops can get expensive
Latency: Agent loops can be slow
Observability: Hard to debug what happened
Safety: Prevent destructive actions

Key Production Practices

Guardrails

Limit maximum iterations
Validate tool inputs before execution
Human-in-the-loop for dangerous actions
Rate limiting

Observability

Log every LLM call and tool use
Track token usage and cost
Trace full agent execution paths

Reliability

Retry failed tool calls
Fallback strategies
Timeout handling

Example

python

import time
import logging
from anthropic import Anthropic
from dataclasses import dataclass, field
from typing import Optional

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

client = Anthropic()

@dataclass
class AgentConfig:
    max_iterations: int = 10
    max_tokens_per_call: int = 2048
    timeout_seconds: float = 60.0
    dangerous_tools: list = field(default_factory=list)
    require_confirmation: bool = False

@dataclass
class AgentTrace:
    iterations: int = 0
    total_input_tokens: int = 0
    total_output_tokens: int = 0
    tool_calls: list = field(default_factory=list)
    errors: list = field(default_factory=list)

    @property
    def estimated_cost(self) -> float:
        input_cost = self.total_input_tokens * 0.000001
        output_cost = self.total_output_tokens * 0.000003
        return input_cost + output_cost

class ProductionAgent:
    def __init__(self, config: AgentConfig = None):
        self.config = config or AgentConfig()
        self.trace = AgentTrace()

    def run(self, task: str, tools: list, tool_dispatcher: callable) -> Optional[str]:
        messages = [{"role": "user", "content": task}]
        start_time = time.time()

        logger.info(f"Agent starting task: {task[:100]}...")

        while self.trace.iterations < self.config.max_iterations:
            if time.time() - start_time > self.config.timeout_seconds:
                logger.warning("Agent timed out")
                return "Task timed out. Please try a simpler request."

            self.trace.iterations += 1
            logger.info(f"Iteration {self.trace.iterations}/{self.config.max_iterations}")

            try:
                response = client.messages.create(
                    model="claude-3-5-haiku-20241022",
                    max_tokens=self.config.max_tokens_per_call,
                    tools=tools,
                    messages=messages
                )
            except Exception as e:
                self.trace.errors.append(str(e))
                logger.error(f"LLM call failed: {e}")
                return f"Agent encountered an error: {e}"

            self.trace.total_input_tokens += response.usage.input_tokens
            self.trace.total_output_tokens += response.usage.output_tokens

            messages.append({"role": "assistant", "content": response.content})

            if response.stop_reason == "end_turn":
                result = next((b.text for b in response.content if hasattr(b, "text")), "")
                logger.info(f"Task complete. Cost: ~${self.trace.estimated_cost:.4f}")
                return result

            results = []
            for block in response.content:
                if block.type != "tool_use":
                    continue

                self.trace.tool_calls.append({"tool": block.name, "input": block.input})
                logger.info(f"Tool call: {block.name}({block.input})")

                if block.name in self.config.dangerous_tools and self.config.require_confirmation:
                    confirm = input(f"Confirm tool call {block.name}? [y/N]: ")
                    if confirm.lower() != 'y':
                        results.append({"type": "tool_result", "tool_use_id": block.id,
                                        "content": "Action cancelled by user"})
                        continue

                try:
                    output = tool_dispatcher(block.name, block.input)
                    results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
                except Exception as e:
                    self.trace.errors.append(str(e))
                    results.append({"type": "tool_result", "tool_use_id": block.id,
                                    "content": f"Tool error: {e}", "is_error": True})

            messages.append({"role": "user", "content": results})

        return "Maximum iterations reached without completing the task."

Try it yourself — PYTHON

import time
import logging
from anthropic import Anthropic
from dataclasses import dataclass, field
from typing import Optional

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

client = Anthropic()

@dataclass
class AgentConfig:
    max_iterations: int = 10
    max_tokens_per_call: int = 2048
    timeout_seconds: float = 60.0
    dangerous_tools: list = field(default_factory=list)
    require_confirmation: bool = False

@dataclass
class AgentTrace:
    iterations: int = 0
    total_input_tokens: int = 0
    total_output_tokens: int = 0
    tool_calls: list = field(default_factory=list)
    errors: list = field(default_factory=list)

@property
    def estimated_cost(self) -> float:
        input_cost = self.total_input_tokens * 0.000001
        output_cost = self.total_output_tokens * 0.000003
        return input_cost + output_cost

class ProductionAgent:
    def __init__(self, config: AgentConfig = None):
        self.config = config or AgentConfig()
        self.trace = AgentTrace()

def run(self, task: str, tools: list, tool_dispatcher: callable) -> Optional[str]:
        messages = [{"role": "user", "content": task}]
        start_time = time.time()

logger.info(f"Agent starting task: {task[:100]}...")

while self.trace.iterations < self.config.max_iterations:
            if time.time() - start_time > self.config.timeout_seconds:
                logger.warning("Agent timed out")
                return "Task timed out. Please try a simpler request."

self.trace.iterations += 1
            logger.info(f"Iteration {self.trace.iterations}/{self.config.max_iterations}")

try:
                response = client.messages.create(
                    model="claude-3-5-haiku-20241022",
                    max_tokens=self.config.max_tokens_per_call,
                    tools=tools,
                    messages=messages
                )
            except Exception as e:
                self.trace.errors.append(str(e))
                logger.error(f"LLM call failed: {e}")
                return f"Agent encountered an error: {e}"

self.trace.total_input_tokens += response.usage.input_tokens
            self.trace.total_output_tokens += response.usage.output_tokens

messages.append({"role": "assistant", "content": response.content})

if response.stop_reason == "end_turn":
                result = next((b.text for b in response.content if hasattr(b, "text")), "")
                logger.info(f"Task complete. Cost: ~${self.trace.estimated_cost:.4f}")
                return result

results = []
            for block in response.content:
                if block.type != "tool_use":
                    continue

self.trace.tool_calls.append({"tool": block.name, "input": block.input})
                logger.info(f"Tool call: {block.name}({block.input})")

if block.name in self.config.dangerous_tools and self.config.require_confirmation:
                    confirm = input(f"Confirm tool call {block.name}? [y/N]: ")
                    if confirm.lower() != 'y':
                        results.append({"type": "tool_result", "tool_use_id": block.id,
                                        "content": "Action cancelled by user"})
                        continue

try:
                    output = tool_dispatcher(block.name, block.input)
                    results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
                except Exception as e:
                    self.trace.errors.append(str(e))
                    results.append({"type": "tool_result", "tool_use_id": block.id,
                                    "content": f"Tool error: {e}", "is_error": True})

messages.append({"role": "user", "content": results})

return "Maximum iterations reached without completing the task."