feat(captain): rate limiting with runaway loop detection + bot_handoff

Três camadas de proteção contra runaway token burn no AgentRunnerService: 1. MAX_TURNS_PER_MESSAGE = 15 Cap dentro de uma única chamada run(). Já estava aplicado; agora extraído como constante nomeada. 2. MAX_TURNS_PER_CONVERSATION = 30 Cap ao longo da vida da conversa. Contador em conversation.custom_attributes['captain_turn_count']. Ao atingir, dispara bot_handoff automático e responde com mensagem de transferência pra humano. 3. TOOL_LOOP_THRESHOLD = 3 Detecta a mesma (tool_name, args) invocada 3+ vezes no resultado de um único run (sintoma do loop faq_lookup que queimou tokens em 2026-04-19). Ao detectar: dispara bot_handoff e aborta o turno. trigger_bot_handoff! aciona conversation.bot_handoff! quando disponível, removendo a conversa do pipeline automático. Motivação: dois incidentes reais de queima de crédito OpenAI em 2026-04-19. Ver memory/feedback_never_touch_captain_without_safety_caps.md pras invariantes completas. Tests atualizados: mock_result agora stuba :messages (usado pelo novo tool_loop_detected?) e max_turns esperado é 15. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 11:16:54 -03:00 · 2026-04-19 11:16:54 -03:00 · f3f8a8d5c1
commit f3f8a8d5c1
parent 7bc5103541
2 changed files with 85 additions and 7 deletions
--- a/enterprise/app/services/captain/assistant/agent_runner_service.rb
+++ b/enterprise/app/services/captain/assistant/agent_runner_service.rb
@ -39,7 +39,18 @@ class Captain::Assistant::AgentRunnerService
    @callbacks = callbacks
  end
  # Hard ceilings to prevent runaway token burn. See memory file
  # feedback_never_touch_captain_without_safety_caps.md — two real-world incidents.
  MAX_TURNS_PER_MESSAGE = 15           # Cap inside a single run() call
  MAX_TURNS_PER_CONVERSATION = 30      # Cap across the whole conversation lifetime
  TOOL_LOOP_THRESHOLD = 3              # Same (tool_name, args) invoked N+ times = loop
  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  def generate_response(message_history: [])
    if conversation_turn_limit_exceeded?
      return bot_handoff_response('Conversa atingiu o limite de interações automáticas. Transferindo para atendimento humano.')
    end
    agents = build_and_wire_agents
    context = build_context(message_history)
    message_to_process = extract_last_user_message(message_history)
@ -49,10 +60,16 @@ class Captain::Assistant::AgentRunnerService
    install_instrumentation(runner)
    # max_turns is the hard safety cap: each "turn" = one LLM call + optional tool calls.
    # 100 allowed runaway loops (LLM calling faq_lookup indefinitely when confused).
-    # 15 is plenty for normal flows (greeting -> handoff -> coleta -> tool calls -> resposta)
+    # MAX_TURNS_PER_MESSAGE is plenty for normal flows while keeping a burn-budget ceiling.
-    # while keeping a burn-budget ceiling per message.
+    result = runner.run(message_to_process, context: context, max_turns: MAX_TURNS_PER_MESSAGE)
    result = runner.run(message_to_process, context: context, max_turns: 15)
    if tool_loop_detected?(result)
      Rails.logger.error("[Captain V2] Tool loop detected on conv #{@conversation&.id}. Triggering bot_handoff.")
      trigger_bot_handoff!
      return bot_handoff_response('Detectei um comportamento repetitivo. Transferindo para atendimento humano.')
    end
    increment_conversation_turn_count!
    process_agent_result(result, original_query: message_to_process)
  rescue StandardError => e
    # when running the agent runner service in a rake task, the conversation might not have an account associated
@ -63,9 +80,69 @@ class Captain::Assistant::AgentRunnerService
    error_response(e.message)
  end
  # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
  private
  # --- Rate limiting / runaway protection ---
  # True when this conversation already burned the per-conversation turn budget.
  # Anything beyond MAX_TURNS_PER_CONVERSATION is flagged as runaway and we hand
  # off to a human. The counter lives on conversation.custom_attributes so it
  # survives Sidekiq restarts and is queryable from dashboards.
  def conversation_turn_limit_exceeded?
    return false if @conversation.blank?
    count = @conversation.custom_attributes.to_h['captain_turn_count'].to_i
    count >= MAX_TURNS_PER_CONVERSATION
  end
  def increment_conversation_turn_count!
    return if @conversation.blank?
    attrs = @conversation.custom_attributes.to_h
    attrs['captain_turn_count'] = attrs['captain_turn_count'].to_i + 1
    # rubocop:disable Rails/SkipsModelValidations
    @conversation.update_columns(custom_attributes: attrs)
    # rubocop:enable Rails/SkipsModelValidations
  rescue StandardError => e
    Rails.logger.warn("[Captain V2] increment_conversation_turn_count! failed: #{e.message}")
  end
  # Inspects the messages emitted during the run and flags repeated tool
  # invocations with identical arguments as a runaway loop. Real incident
  # that motivated this: Daniela called faq_lookup('preço pernoite alexa')
  # dozens of times in the same run, burning tokens silently.
  def tool_loop_detected?(result)
    tool_signatures = Array(result&.messages).flat_map do |msg|
      tool_calls = msg[:tool_calls] || msg['tool_calls'] || []
      Array(tool_calls).map do |tc|
        name = (tc[:name] || tc['name']).to_s
        args = tc[:arguments] || tc['arguments']
        args_str = args.is_a?(Hash) ? args.to_json : args.to_s
        "#{name}|#{args_str}"
      end
    end.reject(&:empty?)
    return false if tool_signatures.empty?
    tool_signatures.tally.any? { |_, count| count >= TOOL_LOOP_THRESHOLD }
  end
  def trigger_bot_handoff!
    return if @conversation.blank?
    @conversation.bot_handoff! if @conversation.respond_to?(:bot_handoff!)
  rescue StandardError => e
    Rails.logger.warn("[Captain V2] trigger_bot_handoff! failed: #{e.message}")
  end
  def bot_handoff_response(message)
    { 'response' => message, 'reasoning' => 'Runaway protection triggered', 'reaction_emoji' => '' }
  end
  # --- End rate limiting / runaway protection ---
  def build_context(message_history)
    last_active_scenario_agent = extract_last_scenario_agent(message_history)
--- a/spec/enterprise/services/captain/assistant/agent_runner_service_spec.rb
+++ b/spec/enterprise/services/captain/assistant/agent_runner_service_spec.rb
@ -13,7 +13,7 @@ RSpec.describe Captain::Assistant::AgentRunnerService do
  let(:mock_runner) { instance_double(Agents::Runner) }
  let(:mock_agent) { instance_double(Agents::Agent) }
  let(:mock_scenario_agent) { instance_double(Agents::Agent) }
-  let(:mock_result) { instance_double(Agents::RunResult, output: { 'response' => 'Test response' }, context: nil) }
+  let(:mock_result) { instance_double(Agents::RunResult, output: { 'response' => 'Test response' }, context: nil, messages: []) }
  let(:message_history) do
    [
@ -93,7 +93,7 @@ RSpec.describe Captain::Assistant::AgentRunnerService do
      expect(mock_runner).to receive(:run).with(
        'I need help with my account',
        context: expected_context,
-        max_turns: 100
+        max_turns: 15
      )
      service.generate_response(message_history: message_history)
@ -251,7 +251,7 @@ RSpec.describe Captain::Assistant::AgentRunnerService do
    end
    context 'when agent result is a string' do
-      let(:mock_result) { instance_double(Agents::RunResult, output: 'Simple string response', context: nil) }
+      let(:mock_result) { instance_double(Agents::RunResult, output: 'Simple string response', context: nil, messages: []) }
      it 'formats string response correctly' do
        result = service.generate_response(message_history: message_history)
@ -272,7 +272,8 @@ RSpec.describe Captain::Assistant::AgentRunnerService do
            {"response":"Rodrigo, valor total R$ 260,00.","reasoning":"Primeira resposta","reaction_emoji":"💰"}
            {"response":"Rodrigo, para confirmar a reserva, o sinal é R$ 130,00. Posso gerar o Pix?","reasoning":"Resposta final","reaction_emoji":"💰"}
          JSON_OUTPUT
-          context: nil
+          context: nil,
          messages: []
        )
      end