diff --git a/enterprise/app/models/captain/scenario.rb b/enterprise/app/models/captain/scenario.rb index 0f6c48ee0..8fa2375bd 100644 --- a/enterprise/app/models/captain/scenario.rb +++ b/enterprise/app/models/captain/scenario.rb @@ -66,6 +66,16 @@ class Captain::Scenario < ApplicationRecord "#{title} Agent".parameterize(separator: '_') end + # Scenarios can use a different model than the orchestrator (Assistant). + # Rationale: orchestrator does simple routing (cheap model suffices), + # scenarios handle complex flows (tool calling, strict rules) and benefit + # from a stronger model. Falls back to the global CAPTAIN_OPEN_AI_MODEL + # (used by the orchestrator) when SCENARIO-specific override is unset. + def agent_model + scenario_model = InstallationConfig.find_by(name: 'CAPTAIN_OPEN_AI_MODEL_SCENARIO')&.value.presence + scenario_model || super + end + def agent_tools resolved_tools.map { |tool| resolve_tool_instance(tool) } end diff --git a/enterprise/app/services/captain/assistant/memory_prompt_injector.rb b/enterprise/app/services/captain/assistant/memory_prompt_injector.rb index 4ff522ef8..51158f44f 100644 --- a/enterprise/app/services/captain/assistant/memory_prompt_injector.rb +++ b/enterprise/app/services/captain/assistant/memory_prompt_injector.rb @@ -1,4 +1,7 @@ class Captain::Assistant::MemoryPromptInjector + CACHED_MEMORY_KEY = 'captain_cached_memory_block'.freeze + CACHED_CONTACT_KEY = 'captain_cached_memory_contact_id'.freeze + def initialize(conversation:) @conversation = conversation @memory_block_cache = {} @@ -35,6 +38,17 @@ class Captain::Assistant::MemoryPromptInjector private def memory_block_for(message_text) + # Conversation-level cache: once the memory block is computed for this + # conversation (usually on the first message), reuse it for every + # subsequent turn until the conversation is resolved. The customer's + # profile does not change during an open conversation, so re-running + # embedding + pgvector on every turn is pure waste. + cached = conversation_level_cache + return cached if cached.present? + + # In-memory fallback cache (per-service-instance) for edge cases where + # the conversation_level_cache write fails and we still want to avoid + # re-hitting the API within a single job execution. key = message_text.to_s return @memory_block_cache[key] if @memory_block_cache.key?(key) @@ -44,8 +58,48 @@ class Captain::Assistant::MemoryPromptInjector unit_id: resolve_unit_id ).call - @memory_block_cache[key] = - Captain::ContactMemories::PromptInjectionService.new(memories: memories).call + block = Captain::ContactMemories::PromptInjectionService.new(memories: memories).call + @memory_block_cache[key] = block + persist_conversation_level_cache(block) + block + end + + # Reads the pre-computed memory block stashed on the conversation. + # Returns nil when missing, empty, or stale (different contact). Callers + # still get a fresh recall in those cases. + def conversation_level_cache + return nil if @conversation.blank? + + raw = @conversation.custom_attributes.to_h[CACHED_MEMORY_KEY] + return nil if raw.blank? + + cached_contact_id = @conversation.custom_attributes.to_h[CACHED_CONTACT_KEY] + return nil if cached_contact_id.present? && cached_contact_id.to_i != @conversation.contact_id.to_i + + raw.to_s + rescue StandardError => e + Rails.logger.warn("[Captain V2] MemoryPromptInjector read cache failed: #{e.message}") + nil + end + + # Stores the computed block on the conversation so future turns reuse it. + # Stored as a custom_attribute to avoid a new column. The resolve-conversation + # listener in Phase 4 already fires ExtractFromConversationJob — a future + # enhancement can clear this cache on resolve, but letting it live is + # harmless (next conversation is a new record with empty custom_attributes). + def persist_conversation_level_cache(block) + return if @conversation.blank? || block.to_s.empty? + + attrs = @conversation.custom_attributes.to_h + attrs[CACHED_MEMORY_KEY] = block + attrs[CACHED_CONTACT_KEY] = @conversation.contact_id + # rubocop:disable Rails/SkipsModelValidations + # update_columns deliberately — this cache write runs on every turn + # and must not trigger callbacks (which could re-enqueue heavy jobs). + @conversation.update_columns(custom_attributes: attrs) + # rubocop:enable Rails/SkipsModelValidations + rescue StandardError => e + Rails.logger.warn("[Captain V2] MemoryPromptInjector write cache failed: #{e.message}") end def resolve_unit_id