fix(captain): use metadata for openai_file_id and add pdf-reader gem
This commit is contained in:
parent
a229b0a0f1
commit
4b4feb915d
17
Gemfile.lock
17
Gemfile.lock
@ -25,6 +25,7 @@ GIT
|
|||||||
GEM
|
GEM
|
||||||
remote: https://rubygems.org/
|
remote: https://rubygems.org/
|
||||||
specs:
|
specs:
|
||||||
|
Ascii85 (2.0.1)
|
||||||
actioncable (7.1.5.2)
|
actioncable (7.1.5.2)
|
||||||
actionpack (= 7.1.5.2)
|
actionpack (= 7.1.5.2)
|
||||||
activesupport (= 7.1.5.2)
|
activesupport (= 7.1.5.2)
|
||||||
@ -126,6 +127,7 @@ GEM
|
|||||||
jbuilder (~> 2)
|
jbuilder (~> 2)
|
||||||
rails (>= 4.2, < 7.2)
|
rails (>= 4.2, < 7.2)
|
||||||
selectize-rails (~> 0.6)
|
selectize-rails (~> 0.6)
|
||||||
|
afm (1.0.0)
|
||||||
ai-agents (0.7.0)
|
ai-agents (0.7.0)
|
||||||
ruby_llm (~> 1.8.2)
|
ruby_llm (~> 1.8.2)
|
||||||
annotaterb (4.20.0)
|
annotaterb (4.20.0)
|
||||||
@ -429,6 +431,7 @@ GEM
|
|||||||
hana (1.3.7)
|
hana (1.3.7)
|
||||||
hash_diff (1.1.1)
|
hash_diff (1.1.1)
|
||||||
hashdiff (1.1.0)
|
hashdiff (1.1.0)
|
||||||
|
hashery (2.1.2)
|
||||||
hashie (5.0.0)
|
hashie (5.0.0)
|
||||||
html2text (0.4.0)
|
html2text (0.4.0)
|
||||||
nokogiri (>= 1.0, < 2.0)
|
nokogiri (>= 1.0, < 2.0)
|
||||||
@ -654,6 +657,12 @@ GEM
|
|||||||
parser (3.3.8.0)
|
parser (3.3.8.0)
|
||||||
ast (~> 2.4.1)
|
ast (~> 2.4.1)
|
||||||
racc
|
racc
|
||||||
|
pdf-reader (2.15.1)
|
||||||
|
Ascii85 (>= 1.0, < 3.0, != 2.0.0)
|
||||||
|
afm (>= 0.2.1, < 2)
|
||||||
|
hashery (~> 2.0)
|
||||||
|
ruby-rc4
|
||||||
|
ttfunk
|
||||||
pg (1.5.3)
|
pg (1.5.3)
|
||||||
pg_search (2.3.6)
|
pg_search (2.3.6)
|
||||||
activerecord (>= 5.2)
|
activerecord (>= 5.2)
|
||||||
@ -813,6 +822,7 @@ GEM
|
|||||||
faraday (>= 1)
|
faraday (>= 1)
|
||||||
faraday-multipart (>= 1)
|
faraday-multipart (>= 1)
|
||||||
ruby-progressbar (1.13.0)
|
ruby-progressbar (1.13.0)
|
||||||
|
ruby-rc4 (0.1.5)
|
||||||
ruby-saml (1.18.1)
|
ruby-saml (1.18.1)
|
||||||
nokogiri (>= 1.13.10)
|
nokogiri (>= 1.13.10)
|
||||||
rexml
|
rexml
|
||||||
@ -945,6 +955,8 @@ GEM
|
|||||||
i18n
|
i18n
|
||||||
timeout (0.4.3)
|
timeout (0.4.3)
|
||||||
trailblazer-option (0.1.2)
|
trailblazer-option (0.1.2)
|
||||||
|
ttfunk (1.8.0)
|
||||||
|
bigdecimal (~> 3.1)
|
||||||
twilio-ruby (7.6.0)
|
twilio-ruby (7.6.0)
|
||||||
faraday (>= 0.9, < 3.0)
|
faraday (>= 0.9, < 3.0)
|
||||||
jwt (>= 1.5, < 3.0)
|
jwt (>= 1.5, < 3.0)
|
||||||
@ -1097,6 +1109,7 @@ DEPENDENCIES
|
|||||||
opensearch-ruby
|
opensearch-ruby
|
||||||
opentelemetry-exporter-otlp
|
opentelemetry-exporter-otlp
|
||||||
opentelemetry-sdk
|
opentelemetry-sdk
|
||||||
|
pdf-reader
|
||||||
pg
|
pg
|
||||||
pg_search
|
pg_search
|
||||||
pgvector
|
pgvector
|
||||||
@ -1161,7 +1174,7 @@ DEPENDENCIES
|
|||||||
working_hours
|
working_hours
|
||||||
|
|
||||||
RUBY VERSION
|
RUBY VERSION
|
||||||
ruby 3.4.4p34
|
ruby 3.4.4p34
|
||||||
|
|
||||||
BUNDLED WITH
|
BUNDLED WITH
|
||||||
4.0.3
|
2.5.11
|
||||||
|
|||||||
@ -29,8 +29,9 @@ class Captain::Llm::PdfProcessingService < Llm::LegacyBaseOpenAiService
|
|||||||
|
|
||||||
if content.present?
|
if content.present?
|
||||||
Rails.logger.info "PDF extracted content for document #{document.id} (chars=#{content.length})"
|
Rails.logger.info "PDF extracted content for document #{document.id} (chars=#{content.length})"
|
||||||
# Update content and ensure openai_file_id is nil to force standard FAQ generation
|
# Update content and clear openai_file_id in metadata to force standard FAQ generation.
|
||||||
document.update!(content: content, openai_file_id: nil)
|
metadata = (document.metadata || {}).merge('openai_file_id' => nil)
|
||||||
|
document.update!(content: content, metadata: metadata)
|
||||||
else
|
else
|
||||||
Rails.logger.warn "PDF extracted content is empty for document #{document.id}"
|
Rails.logger.warn "PDF extracted content is empty for document #{document.id}"
|
||||||
end
|
end
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user