From f3f8a8d5c12c34f968dc8ff91ab216fbabcdc8da Mon Sep 17 00:00:00 2001 From: Rodribm10 Date: Sun, 19 Apr 2026 11:16:54 -0300 Subject: [PATCH] feat(captain): rate limiting with runaway loop detection + bot_handoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Três camadas de proteção contra runaway token burn no AgentRunnerService: 1. MAX_TURNS_PER_MESSAGE = 15 Cap dentro de uma única chamada run(). Já estava aplicado; agora extraído como constante nomeada. 2. MAX_TURNS_PER_CONVERSATION = 30 Cap ao longo da vida da conversa. Contador em conversation.custom_attributes['captain_turn_count']. Ao atingir, dispara bot_handoff automático e responde com mensagem de transferência pra humano. 3. TOOL_LOOP_THRESHOLD = 3 Detecta a mesma (tool_name, args) invocada 3+ vezes no resultado de um único run (sintoma do loop faq_lookup que queimou tokens em 2026-04-19). Ao detectar: dispara bot_handoff e aborta o turno. trigger_bot_handoff! aciona conversation.bot_handoff! quando disponível, removendo a conversa do pipeline automático. Motivação: dois incidentes reais de queima de crédito OpenAI em 2026-04-19. Ver memory/feedback_never_touch_captain_without_safety_caps.md pras invariantes completas. Tests atualizados: mock_result agora stuba :messages (usado pelo novo tool_loop_detected?) e max_turns esperado é 15. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../captain/assistant/agent_runner_service.rb | 83 ++++++++++++++++++- .../assistant/agent_runner_service_spec.rb | 9 +- 2 files changed, 85 insertions(+), 7 deletions(-) diff --git a/enterprise/app/services/captain/assistant/agent_runner_service.rb b/enterprise/app/services/captain/assistant/agent_runner_service.rb index 95cca90cc..c9ac26023 100644 --- a/enterprise/app/services/captain/assistant/agent_runner_service.rb +++ b/enterprise/app/services/captain/assistant/agent_runner_service.rb @@ -39,7 +39,18 @@ class Captain::Assistant::AgentRunnerService @callbacks = callbacks end + # Hard ceilings to prevent runaway token burn. See memory file + # feedback_never_touch_captain_without_safety_caps.md — two real-world incidents. + MAX_TURNS_PER_MESSAGE = 15 # Cap inside a single run() call + MAX_TURNS_PER_CONVERSATION = 30 # Cap across the whole conversation lifetime + TOOL_LOOP_THRESHOLD = 3 # Same (tool_name, args) invoked N+ times = loop + + # rubocop:disable Metrics/MethodLength, Metrics/AbcSize def generate_response(message_history: []) + if conversation_turn_limit_exceeded? + return bot_handoff_response('Conversa atingiu o limite de interações automáticas. Transferindo para atendimento humano.') + end + agents = build_and_wire_agents context = build_context(message_history) message_to_process = extract_last_user_message(message_history) @@ -49,10 +60,16 @@ class Captain::Assistant::AgentRunnerService install_instrumentation(runner) # max_turns is the hard safety cap: each "turn" = one LLM call + optional tool calls. # 100 allowed runaway loops (LLM calling faq_lookup indefinitely when confused). - # 15 is plenty for normal flows (greeting -> handoff -> coleta -> tool calls -> resposta) - # while keeping a burn-budget ceiling per message. - result = runner.run(message_to_process, context: context, max_turns: 15) + # MAX_TURNS_PER_MESSAGE is plenty for normal flows while keeping a burn-budget ceiling. + result = runner.run(message_to_process, context: context, max_turns: MAX_TURNS_PER_MESSAGE) + if tool_loop_detected?(result) + Rails.logger.error("[Captain V2] Tool loop detected on conv #{@conversation&.id}. Triggering bot_handoff.") + trigger_bot_handoff! + return bot_handoff_response('Detectei um comportamento repetitivo. Transferindo para atendimento humano.') + end + + increment_conversation_turn_count! process_agent_result(result, original_query: message_to_process) rescue StandardError => e # when running the agent runner service in a rake task, the conversation might not have an account associated @@ -63,9 +80,69 @@ class Captain::Assistant::AgentRunnerService error_response(e.message) end + # rubocop:enable Metrics/MethodLength, Metrics/AbcSize private + # --- Rate limiting / runaway protection --- + + # True when this conversation already burned the per-conversation turn budget. + # Anything beyond MAX_TURNS_PER_CONVERSATION is flagged as runaway and we hand + # off to a human. The counter lives on conversation.custom_attributes so it + # survives Sidekiq restarts and is queryable from dashboards. + def conversation_turn_limit_exceeded? + return false if @conversation.blank? + + count = @conversation.custom_attributes.to_h['captain_turn_count'].to_i + count >= MAX_TURNS_PER_CONVERSATION + end + + def increment_conversation_turn_count! + return if @conversation.blank? + + attrs = @conversation.custom_attributes.to_h + attrs['captain_turn_count'] = attrs['captain_turn_count'].to_i + 1 + # rubocop:disable Rails/SkipsModelValidations + @conversation.update_columns(custom_attributes: attrs) + # rubocop:enable Rails/SkipsModelValidations + rescue StandardError => e + Rails.logger.warn("[Captain V2] increment_conversation_turn_count! failed: #{e.message}") + end + + # Inspects the messages emitted during the run and flags repeated tool + # invocations with identical arguments as a runaway loop. Real incident + # that motivated this: Daniela called faq_lookup('preço pernoite alexa') + # dozens of times in the same run, burning tokens silently. + def tool_loop_detected?(result) + tool_signatures = Array(result&.messages).flat_map do |msg| + tool_calls = msg[:tool_calls] || msg['tool_calls'] || [] + Array(tool_calls).map do |tc| + name = (tc[:name] || tc['name']).to_s + args = tc[:arguments] || tc['arguments'] + args_str = args.is_a?(Hash) ? args.to_json : args.to_s + "#{name}|#{args_str}" + end + end.reject(&:empty?) + + return false if tool_signatures.empty? + + tool_signatures.tally.any? { |_, count| count >= TOOL_LOOP_THRESHOLD } + end + + def trigger_bot_handoff! + return if @conversation.blank? + + @conversation.bot_handoff! if @conversation.respond_to?(:bot_handoff!) + rescue StandardError => e + Rails.logger.warn("[Captain V2] trigger_bot_handoff! failed: #{e.message}") + end + + def bot_handoff_response(message) + { 'response' => message, 'reasoning' => 'Runaway protection triggered', 'reaction_emoji' => '' } + end + + # --- End rate limiting / runaway protection --- + def build_context(message_history) last_active_scenario_agent = extract_last_scenario_agent(message_history) diff --git a/spec/enterprise/services/captain/assistant/agent_runner_service_spec.rb b/spec/enterprise/services/captain/assistant/agent_runner_service_spec.rb index 10f7a2f85..69b8513b7 100644 --- a/spec/enterprise/services/captain/assistant/agent_runner_service_spec.rb +++ b/spec/enterprise/services/captain/assistant/agent_runner_service_spec.rb @@ -13,7 +13,7 @@ RSpec.describe Captain::Assistant::AgentRunnerService do let(:mock_runner) { instance_double(Agents::Runner) } let(:mock_agent) { instance_double(Agents::Agent) } let(:mock_scenario_agent) { instance_double(Agents::Agent) } - let(:mock_result) { instance_double(Agents::RunResult, output: { 'response' => 'Test response' }, context: nil) } + let(:mock_result) { instance_double(Agents::RunResult, output: { 'response' => 'Test response' }, context: nil, messages: []) } let(:message_history) do [ @@ -93,7 +93,7 @@ RSpec.describe Captain::Assistant::AgentRunnerService do expect(mock_runner).to receive(:run).with( 'I need help with my account', context: expected_context, - max_turns: 100 + max_turns: 15 ) service.generate_response(message_history: message_history) @@ -251,7 +251,7 @@ RSpec.describe Captain::Assistant::AgentRunnerService do end context 'when agent result is a string' do - let(:mock_result) { instance_double(Agents::RunResult, output: 'Simple string response', context: nil) } + let(:mock_result) { instance_double(Agents::RunResult, output: 'Simple string response', context: nil, messages: []) } it 'formats string response correctly' do result = service.generate_response(message_history: message_history) @@ -272,7 +272,8 @@ RSpec.describe Captain::Assistant::AgentRunnerService do {"response":"Rodrigo, valor total R$ 260,00.","reasoning":"Primeira resposta","reaction_emoji":"💰"} {"response":"Rodrigo, para confirmar a reserva, o sinal é R$ 130,00. Posso gerar o Pix?","reasoning":"Resposta final","reaction_emoji":"💰"} JSON_OUTPUT - context: nil + context: nil, + messages: [] ) end