fix(captain): use metadata for openai_file_id and add pdf-reader gem

This commit is contained in:
Rodrigo Borba 2026-01-05 08:07:48 -03:00
parent a229b0a0f1
commit 4b4feb915d
2 changed files with 18 additions and 4 deletions

View File

@ -25,6 +25,7 @@ GIT
GEM GEM
remote: https://rubygems.org/ remote: https://rubygems.org/
specs: specs:
Ascii85 (2.0.1)
actioncable (7.1.5.2) actioncable (7.1.5.2)
actionpack (= 7.1.5.2) actionpack (= 7.1.5.2)
activesupport (= 7.1.5.2) activesupport (= 7.1.5.2)
@ -126,6 +127,7 @@ GEM
jbuilder (~> 2) jbuilder (~> 2)
rails (>= 4.2, < 7.2) rails (>= 4.2, < 7.2)
selectize-rails (~> 0.6) selectize-rails (~> 0.6)
afm (1.0.0)
ai-agents (0.7.0) ai-agents (0.7.0)
ruby_llm (~> 1.8.2) ruby_llm (~> 1.8.2)
annotaterb (4.20.0) annotaterb (4.20.0)
@ -429,6 +431,7 @@ GEM
hana (1.3.7) hana (1.3.7)
hash_diff (1.1.1) hash_diff (1.1.1)
hashdiff (1.1.0) hashdiff (1.1.0)
hashery (2.1.2)
hashie (5.0.0) hashie (5.0.0)
html2text (0.4.0) html2text (0.4.0)
nokogiri (>= 1.0, < 2.0) nokogiri (>= 1.0, < 2.0)
@ -654,6 +657,12 @@ GEM
parser (3.3.8.0) parser (3.3.8.0)
ast (~> 2.4.1) ast (~> 2.4.1)
racc racc
pdf-reader (2.15.1)
Ascii85 (>= 1.0, < 3.0, != 2.0.0)
afm (>= 0.2.1, < 2)
hashery (~> 2.0)
ruby-rc4
ttfunk
pg (1.5.3) pg (1.5.3)
pg_search (2.3.6) pg_search (2.3.6)
activerecord (>= 5.2) activerecord (>= 5.2)
@ -813,6 +822,7 @@ GEM
faraday (>= 1) faraday (>= 1)
faraday-multipart (>= 1) faraday-multipart (>= 1)
ruby-progressbar (1.13.0) ruby-progressbar (1.13.0)
ruby-rc4 (0.1.5)
ruby-saml (1.18.1) ruby-saml (1.18.1)
nokogiri (>= 1.13.10) nokogiri (>= 1.13.10)
rexml rexml
@ -945,6 +955,8 @@ GEM
i18n i18n
timeout (0.4.3) timeout (0.4.3)
trailblazer-option (0.1.2) trailblazer-option (0.1.2)
ttfunk (1.8.0)
bigdecimal (~> 3.1)
twilio-ruby (7.6.0) twilio-ruby (7.6.0)
faraday (>= 0.9, < 3.0) faraday (>= 0.9, < 3.0)
jwt (>= 1.5, < 3.0) jwt (>= 1.5, < 3.0)
@ -1097,6 +1109,7 @@ DEPENDENCIES
opensearch-ruby opensearch-ruby
opentelemetry-exporter-otlp opentelemetry-exporter-otlp
opentelemetry-sdk opentelemetry-sdk
pdf-reader
pg pg
pg_search pg_search
pgvector pgvector
@ -1161,7 +1174,7 @@ DEPENDENCIES
working_hours working_hours
RUBY VERSION RUBY VERSION
ruby 3.4.4p34 ruby 3.4.4p34
BUNDLED WITH BUNDLED WITH
4.0.3 2.5.11

View File

@ -29,8 +29,9 @@ class Captain::Llm::PdfProcessingService < Llm::LegacyBaseOpenAiService
if content.present? if content.present?
Rails.logger.info "PDF extracted content for document #{document.id} (chars=#{content.length})" Rails.logger.info "PDF extracted content for document #{document.id} (chars=#{content.length})"
# Update content and ensure openai_file_id is nil to force standard FAQ generation # Update content and clear openai_file_id in metadata to force standard FAQ generation.
document.update!(content: content, openai_file_id: nil) metadata = (document.metadata || {}).merge('openai_file_id' => nil)
document.update!(content: content, metadata: metadata)
else else
Rails.logger.warn "PDF extracted content is empty for document #{document.id}" Rails.logger.warn "PDF extracted content is empty for document #{document.id}"
end end