diff --git a/Gemfile.lock b/Gemfile.lock index 7962075..fa99e92 100755 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -25,6 +25,7 @@ GIT GEM remote: https://rubygems.org/ specs: + Ascii85 (2.0.1) actioncable (7.1.5.2) actionpack (= 7.1.5.2) activesupport (= 7.1.5.2) @@ -126,6 +127,7 @@ GEM jbuilder (~> 2) rails (>= 4.2, < 7.2) selectize-rails (~> 0.6) + afm (1.0.0) ai-agents (0.7.0) ruby_llm (~> 1.8.2) annotaterb (4.20.0) @@ -429,6 +431,7 @@ GEM hana (1.3.7) hash_diff (1.1.1) hashdiff (1.1.0) + hashery (2.1.2) hashie (5.0.0) html2text (0.4.0) nokogiri (>= 1.0, < 2.0) @@ -654,6 +657,12 @@ GEM parser (3.3.8.0) ast (~> 2.4.1) racc + pdf-reader (2.15.1) + Ascii85 (>= 1.0, < 3.0, != 2.0.0) + afm (>= 0.2.1, < 2) + hashery (~> 2.0) + ruby-rc4 + ttfunk pg (1.5.3) pg_search (2.3.6) activerecord (>= 5.2) @@ -813,6 +822,7 @@ GEM faraday (>= 1) faraday-multipart (>= 1) ruby-progressbar (1.13.0) + ruby-rc4 (0.1.5) ruby-saml (1.18.1) nokogiri (>= 1.13.10) rexml @@ -945,6 +955,8 @@ GEM i18n timeout (0.4.3) trailblazer-option (0.1.2) + ttfunk (1.8.0) + bigdecimal (~> 3.1) twilio-ruby (7.6.0) faraday (>= 0.9, < 3.0) jwt (>= 1.5, < 3.0) @@ -1097,6 +1109,7 @@ DEPENDENCIES opensearch-ruby opentelemetry-exporter-otlp opentelemetry-sdk + pdf-reader pg pg_search pgvector @@ -1161,7 +1174,7 @@ DEPENDENCIES working_hours RUBY VERSION - ruby 3.4.4p34 + ruby 3.4.4p34 BUNDLED WITH - 4.0.3 + 2.5.11 diff --git a/enterprise/app/services/captain/llm/pdf_processing_service.rb b/enterprise/app/services/captain/llm/pdf_processing_service.rb index 6aac195..817a19a 100755 --- a/enterprise/app/services/captain/llm/pdf_processing_service.rb +++ b/enterprise/app/services/captain/llm/pdf_processing_service.rb @@ -29,8 +29,9 @@ class Captain::Llm::PdfProcessingService < Llm::LegacyBaseOpenAiService if content.present? Rails.logger.info "PDF extracted content for document #{document.id} (chars=#{content.length})" - # Update content and ensure openai_file_id is nil to force standard FAQ generation - document.update!(content: content, openai_file_id: nil) + # Update content and clear openai_file_id in metadata to force standard FAQ generation. + metadata = (document.metadata || {}).merge('openai_file_id' => nil) + document.update!(content: content, metadata: metadata) else Rails.logger.warn "PDF extracted content is empty for document #{document.id}" end