diff --git a/enterprise/app/services/captain/contact_memories/extraction_service.rb b/enterprise/app/services/captain/contact_memories/extraction_service.rb new file mode 100644 index 000000000..b79ccf02e --- /dev/null +++ b/enterprise/app/services/captain/contact_memories/extraction_service.rb @@ -0,0 +1,87 @@ +class Captain::ContactMemories::ExtractionService + MAX_FACTS = 5 + MIN_CONFIDENCE = 0.5 + EXTRACTION_MODEL = 'gpt-4o-mini'.freeze + + def initialize(conversation:) + @conversation = conversation + end + + def call + raw = call_llm + parsed = JSON.parse(raw) + facts = parsed.fetch('facts', []) + facts.filter_map { |f| normalize(f) }.take(MAX_FACTS) + rescue JSON::ParserError => e + Rails.logger.warn("[ContactMemory::ExtractionService] JSON parse: #{e.message}") + [] + rescue StandardError => e + Rails.logger.error("[ContactMemory::ExtractionService] #{e.class}: #{e.message}") + [] + end + + private + + def call_llm + response = RubyLLM.chat(model: EXTRACTION_MODEL) + .with_temperature(0) + .with_params(response_format: { type: 'json_object' }) + .ask(build_prompt) + response.content.to_s + end + + def build_prompt + <<~PROMPT + Você é um analista que extrai FATOS MEMORÁVEIS de uma conversa de WhatsApp entre um hóspede e um hotel. + + Taxonomia (SÓ use estes tipos, caso contrário descarte o fato): + #{Captain::ContactMemory::MEMORY_TYPES.join(', ')} + + Para cada fato, retorne JSON com: + - memory_type (um dos tipos acima) + - content (frase curta, português, max 1000 chars) + - evidence (trecho LITERAL da conversa que sustenta o fato — obrigatório) + - confidence (0.0 a 1.0) + - scope ('global' na maioria dos casos; 'unit:' só se o fato for operacional de uma unidade específica) + + Regras INVIOLÁVEIS: + 1. Se não houver evidência textual clara, NÃO extraia o fato. + 2. Máximo 5 fatos por conversa. Extraia só os realmente memoráveis. + 3. Se a conversa não tem nada memorável, retorne {"facts": []}. + 4. Nunca invente fatos. Se em dúvida, descarte. + + Conversa: + #{formatted_messages} + + Retorne JSON no formato: {"facts": [{...}, ...]} + PROMPT + end + + def formatted_messages + @conversation.messages + .where(message_type: [:incoming, :outgoing], private: false) + .order(created_at: :asc) + .map { |m| "[#{m.message_type}] #{m.content}" } + .join("\n") + end + + def normalize(raw_fact) + type = raw_fact['memory_type'].to_s + content = raw_fact['content'].to_s.strip + evidence = raw_fact['evidence'].to_s.strip + confidence = raw_fact['confidence'].to_f + scope = raw_fact['scope'].to_s.presence || 'global' + + return nil unless Captain::ContactMemory::MEMORY_TYPES.include?(type) + return nil if content.blank? || evidence.blank? + return nil if confidence < MIN_CONFIDENCE + + { + memory_type: type, + content: content.truncate(1000), + evidence: evidence, + confidence: confidence, + scope: scope + } + end +end diff --git a/spec/enterprise/services/captain/contact_memories/extraction_service_spec.rb b/spec/enterprise/services/captain/contact_memories/extraction_service_spec.rb new file mode 100644 index 000000000..87959aa4d --- /dev/null +++ b/spec/enterprise/services/captain/contact_memories/extraction_service_spec.rb @@ -0,0 +1,82 @@ +# rubocop:disable RSpec/AnyInstance +require 'rails_helper' + +RSpec.describe Captain::ContactMemories::ExtractionService do + let(:account) { create(:account) } + let(:contact) { create(:contact, account: account) } + let(:conversation) { create(:conversation, account: account, contact: contact) } + let(:incoming_content) { 'Oi, quero reservar a Stilo com hidro de novo pro meu aniversário dia 14/02' } + + before do + create(:message, conversation: conversation, message_type: :incoming, content: incoming_content) + create(:message, conversation: conversation, message_type: :outgoing, content: 'Claro! Confirmando então...') + end + + describe '#call' do + let(:llm_response) do + { + 'facts' => [ + { + 'memory_type' => 'preferencia', + 'content' => 'Prefere Stilo com hidromassagem', + 'evidence' => "disse 'quero a Stilo com hidro de novo'", + 'confidence' => 0.92, + 'scope' => 'global' + }, + { + 'memory_type' => 'data_comemorativa', + 'content' => 'Aniversário dia 14/02', + 'evidence' => "disse 'meu aniversário dia 14/02'", + 'confidence' => 0.88, + 'scope' => 'global' + } + ] + }.to_json + end + + before do + allow_any_instance_of(described_class).to receive(:call_llm).and_return(llm_response) + end + + it 'returns array of valid facts' do + result = described_class.new(conversation: conversation).call + expect(result.size).to eq(2) + expect(result.first[:memory_type]).to eq('preferencia') + end + + it 'drops facts with missing evidence' do + bad_response = { 'facts' => [{ 'memory_type' => 'preferencia', 'content' => 'x', 'evidence' => '', 'confidence' => 0.9 }] }.to_json + allow_any_instance_of(described_class).to receive(:call_llm).and_return(bad_response) + expect(described_class.new(conversation: conversation).call).to eq([]) + end + + it 'drops facts with confidence < 0.5' do + bad_response = { 'facts' => [{ 'memory_type' => 'preferencia', 'content' => 'x', 'evidence' => 'y', 'confidence' => 0.3 }] }.to_json + allow_any_instance_of(described_class).to receive(:call_llm).and_return(bad_response) + expect(described_class.new(conversation: conversation).call).to eq([]) + end + + it 'drops facts with invalid type' do + bad_response = { 'facts' => [{ 'memory_type' => 'invalid', 'content' => 'x', 'evidence' => 'y', 'confidence' => 0.9 }] }.to_json + allow_any_instance_of(described_class).to receive(:call_llm).and_return(bad_response) + expect(described_class.new(conversation: conversation).call).to eq([]) + end + + it 'limits to 5 facts even if LLM returns more' do + many = { 'facts' => Array.new(10) { { 'memory_type' => 'preferencia', 'content' => 'x', 'evidence' => 'y', 'confidence' => 0.9 } } }.to_json + allow_any_instance_of(described_class).to receive(:call_llm).and_return(many) + expect(described_class.new(conversation: conversation).call.size).to eq(5) + end + + it 'returns empty on LLM JSON parse error' do + allow_any_instance_of(described_class).to receive(:call_llm).and_return('not json') + expect(described_class.new(conversation: conversation).call).to eq([]) + end + + it 'returns empty on LLM error' do + allow_any_instance_of(described_class).to receive(:call_llm).and_raise(StandardError) + expect(described_class.new(conversation: conversation).call).to eq([]) + end + end +end +# rubocop:enable RSpec/AnyInstance