From b457e84c2feb8d2be572ed1082f47f72434140ff Mon Sep 17 00:00:00 2001 From: Rodribm10 Date: Wed, 22 Apr 2026 17:42:31 -0300 Subject: [PATCH] fix(captain): route embeddings to legacy OpenAI + retry transient errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve duas camadas de problema identificadas em teste end-to-end: 1. Embeddings falhavam com HTTP 404 (/codex/v1/embeddings não existe). Solução: Captain::Llm::EmbeddingService sempre usa OpenAI tradicional via Llm::Config.with_api_key(legacy_settings). ProviderConfig expõe legacy_openai_settings pra isso. 2. Servidor Codex ocasionalmente responde com response.failed + code=server_error (instabilidade transitória). Client agora retenta até 2x com backoff exponencial (0.5s, 1.5s) em erros retryable: HTTP 5xx, server_error no response.failed, ou stream inacabado. Outras correções nesta etapa: - Scenario#agent_model: em modo Codex, ignora CAPTAIN_OPEN_AI_MODEL_SCENARIO (que pode ter gpt-4o legado) e usa ProviderConfig.model. - ExtractionService/ContradictionCheckerService/TranslateQueryService: trocam constantes hardcoded gpt-4o-mini/gpt-4.1-nano por ProviderConfig.light_model (respeitando o provider ativo). - ProviderConfig.DEFAULT_CODEX_MODEL agora é gpt-5.2 (reconhecido pelo RubyLLM; gpt-5.4 não está no catalog do gem). Validado ponta-a-ponta: WhatsApp → Chatwoot → Jasmine → handoff Daniela → faq_lookup com embedding OK → resposta com preços corretos. Docs em docs/captain-codex-oauth.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/models/contact.rb | 48 ++-- db/schema.rb | 2 +- db/seed_prompts/jasmine_orchestrator.md | 0 docs/captain-codex-oauth.md | 215 ++++++++++++++++++ docs/chatwoot-staging-deploy.md | 161 +++++++++++++ .../app/models/captain/codex_credential.rb | 21 ++ enterprise/app/models/captain/scenario.rb | 4 + .../app/services/captain/codex/client.rb | 36 ++- .../contradiction_checker_service.rb | 7 +- .../contact_memories/extraction_service.rb | 7 +- .../services/captain/llm/embedding_service.rb | 13 +- .../services/captain/llm/provider_config.rb | 35 ++- .../captain/llm/translate_query_service.rb | 6 +- .../captain/llm/provider_config_spec.rb | 4 +- 14 files changed, 524 insertions(+), 35 deletions(-) create mode 100644 db/seed_prompts/jasmine_orchestrator.md create mode 100644 docs/captain-codex-oauth.md create mode 100644 docs/chatwoot-staging-deploy.md diff --git a/app/models/contact.rb b/app/models/contact.rb index 3badf478d..28f7af2d9 100644 --- a/app/models/contact.rb +++ b/app/models/contact.rb @@ -4,32 +4,44 @@ # # Table name: contacts # -# id :integer not null, primary key -# additional_attributes :jsonb -# blocked :boolean default(FALSE), not null -# contact_type :integer default("visitor") -# country_code :string default("") -# custom_attributes :jsonb -# email :string -# identifier :string -# last_activity_at :datetime -# last_name :string default("") -# location :string default("") -# middle_name :string default("") -# name :string default("") -# phone_number :string -# created_at :datetime not null -# updated_at :datetime not null -# account_id :integer not null -# company_id :bigint +# id :integer not null, primary key +# additional_attributes :jsonb +# blocked :boolean default(FALSE), not null +# contact_type :integer default("visitor") +# country_code :string default("") +# custom_attributes :jsonb +# days_since_last_interaction :integer +# email :string +# first_interaction_at :datetime +# identifier :string +# interactions_count :integer default(0), not null +# is_recurring :boolean default(FALSE), not null +# last_activity_at :datetime +# last_interaction_at :datetime +# last_name :string default("") +# location :string default("") +# middle_name :string default("") +# name :string default("") +# one_shot_consultations_count :integer default(0), not null +# phone_number :string +# pix_generated_count :integer default(0), not null +# reservations_paid_count :integer default(0), not null +# created_at :datetime not null +# updated_at :datetime not null +# account_id :integer not null +# company_id :bigint # # Indexes # +# idx_contacts_account_recurring_last (account_id,is_recurring,last_interaction_at) # index_contacts_on_account_id (account_id) # index_contacts_on_account_id_and_contact_type (account_id,contact_type) # index_contacts_on_account_id_and_last_activity_at (account_id,last_activity_at DESC NULLS LAST) # index_contacts_on_blocked (blocked) # index_contacts_on_company_id (company_id) +# index_contacts_on_days_since_last_interaction (days_since_last_interaction) +# index_contacts_on_is_recurring (is_recurring) +# index_contacts_on_last_interaction_at (last_interaction_at) # index_contacts_on_lower_email_account_id (lower((email)::text), account_id) # index_contacts_on_name_email_phone_number_identifier (name,email,phone_number,identifier) USING gin # index_contacts_on_nonempty_fields (account_id,email,phone_number,identifier) WHERE (((email)::text <> ''::text) OR ((phone_number)::text <> ''::text) OR ((identifier)::text <> ''::text)) diff --git a/db/schema.rb b/db/schema.rb index f461d0076..70d309314 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -1173,7 +1173,7 @@ ActiveRecord::Schema[7.1].define(version: 2026_04_22_145733) do t.string "evolution_api_token_iv" t.jsonb "provider_connection", default: {} t.index ["phone_number"], name: "index_channel_whatsapp_on_phone_number", unique: true - t.index ["provider_connection"], name: "index_channel_whatsapp_provider_connection", where: "((provider)::text = ANY (ARRAY[('baileys'::character varying)::text, ('zapi'::character varying)::text]))", using: :gin + t.index ["provider_connection"], name: "index_channel_whatsapp_provider_connection", where: "((provider)::text = ANY ((ARRAY['baileys'::character varying, 'zapi'::character varying])::text[]))", using: :gin end create_table "companies", force: :cascade do |t| diff --git a/db/seed_prompts/jasmine_orchestrator.md b/db/seed_prompts/jasmine_orchestrator.md new file mode 100644 index 000000000..e69de29bb diff --git a/docs/captain-codex-oauth.md b/docs/captain-codex-oauth.md new file mode 100644 index 000000000..28f436189 --- /dev/null +++ b/docs/captain-codex-oauth.md @@ -0,0 +1,215 @@ +# Captain AI via OAuth ChatGPT Plus (Codex) + +Documentação do caminho ponta-a-ponta pra fazer o Captain AI rodar usando a +**assinatura ChatGPT Plus** em vez de API key OpenAI paga por token. + +Status: **funcional em dev** (2026-04-22). Pendente: rollout em staging/prod. + +--- + +## Arquitetura + +``` +Captain (RubyLLM / Agents gem / ruby-openai) + │ + │ POST /v1/chat/completions (formato OpenAI Chat Completions) + ▼ +┌──────────────────────────────────────────────────────┐ +│ Api::Internal::CodexProxyController │ +│ • traduz chat→responses │ +│ • Captain::Codex::AuthService.valid_access_token │ (OAuth refresh automático) +│ • streaming SSE → agregado │ +│ • retry em erros transitórios │ +│ • traduz responses→chat │ +└──────────────────────────────────────────────────────┘ + │ + │ POST https://chatgpt.com/backend-api/codex/responses + │ Authorization: Bearer + ▼ + OpenAI Codex (consome assinatura ChatGPT Plus, sem cobrar por token) +``` + +**Embeddings NÃO passam pelo proxy.** O endpoint Codex não expõe `/embeddings`, +então `Captain::Llm::EmbeddingService` força o uso da OpenAI API tradicional +(requer `CAPTAIN_OPEN_AI_API_KEY` válida mesmo em modo Codex). + +**Files API NÃO passa pelo proxy.** Mesmo motivo — `Llm::LegacyBaseOpenAiService` +(usado em `PdfProcessingService` e `PaginatedFaqGeneratorService`) continua +apontando pra OpenAI tradicional. + +--- + +## Componentes + +| Componente | Papel | +|------------|-------| +| `Captain::CodexCredential` (model) | Tabela singleton com access_token + refresh_token (AR encrypted) | +| `Captain::Codex::AuthService` | Device flow OAuth + refresh automático | +| `Captain::Codex::Client` | HTTP client streaming SSE, com retry em server_error | +| `Captain::Codex::Translator` | Chat Completions ↔ Responses API (bidirectional) | +| `Api::Internal::CodexProxyController` | `POST /codex/v1/chat/completions` | +| `Captain::Llm::ProviderConfig` | Single source of truth de provider/model/api_base | +| `Captain::Codex::RefreshTokensJob` | Sidekiq cron: refresh proativo de tokens (30min) | +| `rake captain:codex:{login,status,refresh}` | Utilitários de ops | + +--- + +## Setup em dev + +### 1. Migration + +```bash +bundle exec rails db:migrate +``` + +### 2. Login OAuth (device flow) + +```bash +bundle exec rails captain:codex:login +``` + +Abre URL no browser → loga com conta ChatGPT Plus → cola código → tokens +salvos em `captain_codex_credentials`. + +### 3. Ativar o provider + configurar modelo + +```ruby +# via rails runner ou bundle exec rails c +InstallationConfig.find_or_initialize_by(name: 'CAPTAIN_LLM_PROVIDER').update!( + value: 'openai_codex_oauth', locked: false +) +InstallationConfig.find_or_initialize_by(name: 'CAPTAIN_CODEX_PROXY_URL').update!( + value: 'http://localhost:3000/codex', locked: false +) +InstallationConfig.find_by!(name: 'CAPTAIN_OPEN_AI_MODEL').update!(value: 'gpt-5.2') +InstallationConfig.find_by!(name: 'CAPTAIN_OPEN_AI_API_KEY').update!(value: 'sk-') +``` + +**Importante sobre CAPTAIN_OPEN_AI_API_KEY:** mesmo em modo Codex OAuth, a key +precisa ser válida — é usada apenas pra embeddings (`/embeddings` não existe no +Codex) e file uploads. Sem essa key, `faq_lookup` e memory recall falham. + +### 4. Reinicia Rails + +```bash +pkill -9 -f 'overmind|vite|sidekiq|rails' 2>/dev/null; sleep 3 +rm -f ./.overmind.sock && pnpm run dev +``` + +### 5. Teste direto no proxy + +```bash +curl -X POST http://localhost:3000/codex/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model":"gpt-5.2","messages":[{"role":"user","content":"Diga: OK"}]}' +``` + +Espera: JSON no formato OpenAI Chat Completions com `choices[0].message.content`. + +--- + +## Modelos suportados + +O endpoint Codex via ChatGPT Plus aceita os modelos da família GPT-5 do Hermes: + +| Modelo | Uso | RubyLLM reconhece? | +|--------|-----|--------------------| +| `gpt-5.2` | **Default atual** — conversação | ✓ | +| `gpt-5.1` | Fallback conversacional | ✓ | +| `gpt-5-codex`, `gpt-5.1-codex`, `gpt-5.1-codex-max`, `gpt-5.1-codex-mini` | Code-focused | ✓ | +| `gpt-5.4`, `gpt-5.3-codex` | Mais novos, melhor qualidade | ✗ (não no catalog do gem) | +| `gpt-4o`, `gpt-4o-mini` | **NÃO funciona** no endpoint Codex | — | + +Pra usar `gpt-5.4`/`gpt-5.3-codex` no futuro: adicionar sobrescrita no proxy +que mapeia modelo recebido → modelo enviado ao Codex (evita validação do RubyLLM). + +--- + +## Peculiaridades da Responses API (vs Chat Completions) + +O Translator lida com as seguintes diferenças: + +| Campo | Chat Completions | Responses | +|-------|------------------|-----------| +| Path | `/chat/completions` | `/responses` | +| Mensagens | `messages: []` | `input: []` | +| System prompt | `{role: "system", content: "..."}` | `instructions: "..."` (top-level, obrigatório) | +| Tools wrapper | `{type: "function", function: {name, description, parameters}}` | `{type: "function", name, description, parameters, strict}` | +| Tool result | `{role: "tool", tool_call_id, content}` | `{type: "function_call_output", call_id, output}` | +| Assistant tool_call | `{role: "assistant", tool_calls: [...]}` | `{type: "function_call", call_id, name, arguments}` | +| Streaming | Opcional (`stream: true`) | **Obrigatório** | +| `temperature`/`top_p` | Aceitos | **Rejeitados** (modelos reasoning) | +| `max_tokens` | `max_tokens` | `max_output_tokens` | +| Output final | `choices[].message` | `output: [items]` via SSE events | +| Storage | Default persiste | `store: false` **obrigatório** | + +--- + +## Troubleshooting + +### Erro: `"Stream must be set to true"` +Request enviou `stream: false`. O Translator força `stream: true` — verifique +se não há caminho que bypassa o Translator. + +### Erro: `"The '' model is not supported when using Codex with a ChatGPT account."` +Algum service está com modelo hardcoded inaceitável (gpt-4o, gpt-4o-mini). +Verifique `CAPTAIN_OPEN_AI_MODEL` e `CAPTAIN_OPEN_AI_MODEL_SCENARIO`. + +### Erro: `"RubyLLM::ModelNotFoundError: Unknown model: "` +O modelo não está no catalog do RubyLLM. Use `gpt-5.2` ou `gpt-5.1` (lista atual +em `RubyLLM.models.all.map(&:id)`). + +### Erro: `"Incorrect API key provided"` em embedding +`CAPTAIN_OPEN_AI_API_KEY` inválida. Embeddings sempre usam OpenAI tradicional, +mesmo em Codex OAuth. + +### Erro: `"response.failed" com code=server_error` +Instabilidade do endpoint Codex ou rate limit da assinatura Plus. O Client +já retenta 2x com backoff (0.5s, 1.5s). Se persistir, pode ser sinal de que +precisa subir de plano (Team/Pro). + +### Voltar pra API tradicional (rollback rápido) + +```ruby +InstallationConfig.find_by!(name: 'CAPTAIN_LLM_PROVIDER').update!(value: 'openai_api') +InstallationConfig.find_by!(name: 'CAPTAIN_OPEN_AI_MODEL').update!(value: 'gpt-4o-mini') +``` + +Depois restart. + +--- + +## Ordem de commits (historical) + +Branch: `feat/captain-codex-oauth` + +1. `chore(captain): PoC Codex OAuth device flow + Responses streaming` + — PoC standalone em Ruby puro (scripts/captain_codex_poc/) + +2. `feat(captain): Codex OAuth auth module + proxy controller` + — Migration, AuthService, Translator, Client, Controller + +3. `fix(captain): always include instructions in Codex responses body` + — Codex exige `instructions` mesmo quando não tem system message + +4. `feat(captain): feature flag CAPTAIN_LLM_PROVIDER + ProviderConfig central` + — Toggle openai_api vs openai_codex_oauth + +5. `fix(captain): route embeddings to legacy OpenAI + retry transient errors` + — Embeddings via OpenAI tradicional + retry automático no Client + +--- + +## Riscos conhecidos + +- **ToS**: uso comercial da assinatura ChatGPT Plus via OAuth não-oficial viola + os termos da OpenAI. OpenAI pode cortar a conta sem aviso, derrubando todos + os hotéis ao mesmo tempo. +- **Rate limits não documentados**: ChatGPT Plus tem limites de mensagens/hora + que não são públicos. Pode bater limite em horário de pico. +- **Client_id do Hermes**: reusamos `app_EMoamEEZ73f0CkXaXp7hrann`. Se o Hermes + regerar o app ou a OpenAI bloquear por terceiros, quebra. +- **Modelos Codex**: otimizados pra código. Qualidade conversacional pode ser + inferior ao gpt-4o em alguns cenários. +- **Fallback não automático**: se o Codex falhar persistentemente, alternância + pra `openai_api` é manual. Rollout em prod deve considerar automação. diff --git a/docs/chatwoot-staging-deploy.md b/docs/chatwoot-staging-deploy.md new file mode 100644 index 000000000..b4a756394 --- /dev/null +++ b/docs/chatwoot-staging-deploy.md @@ -0,0 +1,161 @@ +# Chatwoot — Deploy de branch em staging paralela + +Runbook pra subir qualquer branch do fork `iachat` como stack Swarm paralela +isolada da produção, testar, e só depois fazer merge pra main. + +> **Automação:** existe uma skill do Claude Code que executa esse runbook +> passo a passo. Invoque com "subir branch X em staging" no chat. Arquivos: +> `~/.claude/skills/chatwoot-staging-deploy/`. Este doc é o backup versionado +> pra quando não for usar a skill. + +## Arquitetura atual + +- **Repo:** `github.com/rodribm10/iachat` (fork do Chatwoot) +- **VPS:** `root@76.13.174.155` (Leo), Docker Swarm + Traefik v2.11 + Let's Encrypt +- **Prod:** stack `iachat` em `iachat.hoteis1001noites.com.br`, imagem `ghcr.io/rodribm10/iachat:vN` +- **Workflow CI:** `.github/workflows/deploy_ghcr.yml` → publica `:latest` + `:v` a cada push +- **Credenciais da VPS:** em `docs/acessos_vps.md` (gitignored — NÃO commitar) + +## Fluxo em 9 fases + +### 1. Preparar commit local + +```bash +# Trava credenciais fora do commit +git check-ignore docs/acessos_vps.md || (echo "PERIGO: credenciais não ignoradas"; exit 1) + +git add -A +git diff --cached --name-only | grep -iE "acessos|vps" && { echo "FALHA: credenciais stageadas"; exit 1; } +``` + +Commit com mensagem estruturada (feat/fix + descrição). + +**Pre-commit hooks**: +- ESLint exige i18n — adicionar keys em `app/javascript/dashboard/i18n/locale/{pt_BR,en}/captain.json` +- Rubocop metric violations: `# rubocop:disable Metrics/MethodLength,Metrics/AbcSize` antes da `class` + +**Nunca usar `--no-verify`**. + +### 2. Push + aguardar CI + +```bash +git push origin +gh run list --repo rodribm10/iachat --branch --limit 1 +# aguarda status "completed success" (~10-15min multi-arch) + +# Pega número da tag gerada +gh run view --repo rodribm10/iachat --log \ + | grep "imagetools create" -A 3 | grep -oE "v[0-9]+" +# → retorna "v67" (por exemplo) +``` + +**Sempre usar a tag `vN` específica, NUNCA `:latest`** (outras branches sobrescrevem). + +### 3. Inspeção read-only da VPS + +```bash +ssh root@76.13.174.155 ' +docker stack ls +docker service inspect iachat_iachat_app --format "{{json .Spec}}" | python3 -m json.tool +docker service inspect iachat_iachat_app --format "{{range .Spec.TaskTemplate.ContainerSpec.Env}}{{println .}}{{end}}" +' +``` + +### 4. Gerar secrets únicos NA VPS + +```bash +ssh root@76.13.174.155 " +mkdir -p /root/ +cat > /root//.secrets </.secrets +" +``` + +### 5. Criar `stack.yml` + `app.env` + `postgres_password.txt` + +Templates em: +- `~/.claude/skills/chatwoot-staging-deploy/stack.yml.template` +- `~/.claude/skills/chatwoot-staging-deploy/app.env.template` + +**Pontos críticos**: +- Traefik `rule=Host('')` com `priority=100` pra vencer catchall regex do prod +- Network pública `network_swarm_public` (external) +- Volumes isolados (`postgres_data`, `redis`, `storage`) +- Postgres password via Docker Secret (arquivo `/root//postgres_password.txt`) +- `POSTGRES_DATABASE=iachat_staging` (não `production`) + +### 6. Deploy + +```bash +ssh root@76.13.174.155 " +docker pull ghcr.io/rodribm10/iachat:v +docker stack deploy -c /root//stack.yml --with-registry-auth +sleep 10 +docker service ls --filter name= +" +``` + +Se `_app` reinicia em loop → é DB vazio. Próximo passo. + +### 7. Schema + migrations (container one-off) + +```bash +ssh root@76.13.174.155 " +docker run --rm --network __internal \ + --env-file /root//app.env \ + -e DISABLE_DATABASE_ENVIRONMENT_CHECK=1 \ + ghcr.io/rodribm10/iachat:v \ + sh -c 'bundle exec rails db:schema:load db:migrate db:seed' +" +``` + +`DISABLE_DATABASE_ENVIRONMENT_CHECK=1` é necessário — Rails bloqueia destrutivas em prod, mas aqui DB é zero. + +### 8. Restart do app + +```bash +ssh root@76.13.174.155 "docker service update --force _app" +``` + +Aguarde ~20s. `docker service ps _app` deve mostrar `Running` sem crash. + +### 9. Teste HTTPS + +```bash +curl -sSI https:/// +# Esperado: 302 redirect to /installation/onboarding +``` + +302 → abre no browser e cria admin via onboarding. + +## Troubleshooting + +| Sintoma | Fix | +|---|---| +| `installation_configs does not exist` | Falta Fase 7 (schema:load) | +| `ActiveRecord::ProtectedEnvironmentError` | Adicionar `DISABLE_DATABASE_ENVIRONMENT_CHECK=1` | +| Cert Let's Encrypt inválido | Labels Traefik erradas; conferir `priority=100` e `rule=Host()` não regex | +| 302 pra página do prod | Catchall ganhou; aumentar `priority` ou verificar `rule` específica | +| `image not found` no pull | Tag errada; `gh run list` pra confirmar | + +## Segurança + +1. `docs/acessos_vps.md` — gitignored. NUNCA commite. +2. Senha `Nicodemos1@@1` foi compartilhada no histórico — **trocar nas 4 VPSs** (Leo, Rodrigo, Financeiro, Oracle). +3. Secrets por stack — gerar novos, nunca reutilizar entre envs. +4. Tag `:latest` é sobrescrita por qualquer push — sempre usar `vN`. + +## Estado atual de exemplo + +Primeira execução desse runbook: **2026-04-21** +- Branch: `feat/captain-semantic-memory` +- Stack: `iachat-v2` +- DNS: `iachatv2.hoteis1001noites.com.br` +- Imagem: `ghcr.io/rodribm10/iachat:v67` +- Status: ✅ deploy bem-sucedido, aguardando onboarding do admin diff --git a/enterprise/app/models/captain/codex_credential.rb b/enterprise/app/models/captain/codex_credential.rb index 6abe936f0..48841712f 100644 --- a/enterprise/app/models/captain/codex_credential.rb +++ b/enterprise/app/models/captain/codex_credential.rb @@ -1,3 +1,24 @@ +# == Schema Information +# +# Table name: captain_codex_credentials +# +# id :bigint not null, primary key +# access_token :text not null +# chatgpt_plan_type :string +# email :string +# expires_at :datetime not null +# last_refresh_at :datetime +# refresh_token :text not null +# status :string default("active"), not null +# created_at :datetime not null +# updated_at :datetime not null +# chatgpt_account_id :string +# +# Indexes +# +# index_captain_codex_credentials_on_expires_at (expires_at) +# index_captain_codex_credentials_on_status (status) +# class Captain::CodexCredential < ApplicationRecord self.table_name = 'captain_codex_credentials' diff --git a/enterprise/app/models/captain/scenario.rb b/enterprise/app/models/captain/scenario.rb index 8fa2375bd..33c8ef3d7 100644 --- a/enterprise/app/models/captain/scenario.rb +++ b/enterprise/app/models/captain/scenario.rb @@ -72,6 +72,10 @@ class Captain::Scenario < ApplicationRecord # from a stronger model. Falls back to the global CAPTAIN_OPEN_AI_MODEL # (used by the orchestrator) when SCENARIO-specific override is unset. def agent_model + # Em modo Codex OAuth, ignora CAPTAIN_OPEN_AI_MODEL_SCENARIO (pode ter modelo + # legado como gpt-4o que o Codex rejeita) e usa o modelo padrão do provider. + return Captain::Llm::ProviderConfig.model if Captain::Llm::ProviderConfig.codex_oauth? + scenario_model = InstallationConfig.find_by(name: 'CAPTAIN_OPEN_AI_MODEL_SCENARIO')&.value.presence scenario_model || super end diff --git a/enterprise/app/services/captain/codex/client.rb b/enterprise/app/services/captain/codex/client.rb index e8b378fb6..d0b14f5b8 100644 --- a/enterprise/app/services/captain/codex/client.rb +++ b/enterprise/app/services/captain/codex/client.rb @@ -11,6 +11,8 @@ require 'net/http' # os eventos SSE em um response final no mesmo formato do /responses síncrono. class Captain::Codex::Client API_BASE = 'https://chatgpt.com/backend-api/codex'.freeze + MAX_RETRIES = 2 + RETRY_DELAYS = [0.5, 1.5].freeze # segundos, backoff crescente class Error < StandardError attr_reader :http_status @@ -22,18 +24,48 @@ class Captain::Codex::Client end def responses(body) + attempt = 0 + begin + attempt += 1 + call_responses(body) + rescue Error => e + if retryable?(e) && attempt <= MAX_RETRIES + sleep_time = RETRY_DELAYS[attempt - 1] || RETRY_DELAYS.last + Rails.logger.warn("[Captain::Codex::Client] Retry #{attempt}/#{MAX_RETRIES} after #{sleep_time}s: #{e.message[0, 200]}") + sleep sleep_time + retry + end + raise + end + end + + private + + def call_responses(body) access_token = Captain::Codex::AuthService.valid_access_token state = { items: [], usage: nil, id: nil, model: nil, completed: false, error: nil } stream_post(access_token, body) { |event, data| handle_event(event, data, state) } - raise Error, "Stream failed: #{state[:error].inspect[0, 500]}" if state[:error] + raise transient_error("Stream failed: #{state[:error].inspect[0, 500]}") if state[:error] raise Error, 'Stream finished without response.completed' unless state[:completed] { 'id' => state[:id], 'model' => state[:model], 'output' => state[:items], 'usage' => state[:usage] } end - private + def transient_error(message) + Error.new(message, http_status: 503) + end + + # Retry apenas em erros transitórios: server_error upstream ou HTTP 5xx. + # Não retenta erros de auth (401/403) ou de validação (400). + def retryable?(error) + return true if error.http_status && error.http_status >= 500 + return true if error.message.include?('server_error') + return true if error.message.include?('Stream finished without response.completed') + + false + end def handle_event(event, data, state) case event diff --git a/enterprise/app/services/captain/contact_memories/contradiction_checker_service.rb b/enterprise/app/services/captain/contact_memories/contradiction_checker_service.rb index 0c4243ba6..f9382d910 100644 --- a/enterprise/app/services/captain/contact_memories/contradiction_checker_service.rb +++ b/enterprise/app/services/captain/contact_memories/contradiction_checker_service.rb @@ -6,7 +6,10 @@ class Captain::ContactMemories::ContradictionCheckerService # Above CONFLICT_THRESHOLD: ignore — unrelated facts. DEDUP_THRESHOLD = 0.15 CONFLICT_THRESHOLD = 0.6 - CHECK_MODEL = 'gpt-4o-mini'.freeze + + def self.check_model + Captain::Llm::ProviderConfig.light_model + end def initialize(memory:) @memory = memory @@ -51,7 +54,7 @@ class Captain::ContactMemories::ContradictionCheckerService end def query_llm_for_contradiction(fact_a, fact_b) - response = RubyLLM.chat(model: CHECK_MODEL).with_temperature(0).ask(contradiction_prompt(fact_a, fact_b)).content.to_s + response = RubyLLM.chat(model: self.class.check_model).with_temperature(0).ask(contradiction_prompt(fact_a, fact_b)).content.to_s # Extract the first meaningful word. Expected "sim" or "nao" (or "não"). first_word = response.strip.downcase.gsub(/[^a-zãáéíóúç]/, ' ').split.first.to_s # Normalize "não" → "nao" for ASCII comparison diff --git a/enterprise/app/services/captain/contact_memories/extraction_service.rb b/enterprise/app/services/captain/contact_memories/extraction_service.rb index 489914f76..86278f2c7 100644 --- a/enterprise/app/services/captain/contact_memories/extraction_service.rb +++ b/enterprise/app/services/captain/contact_memories/extraction_service.rb @@ -2,10 +2,13 @@ class Captain::ContactMemories::ExtractionService MAX_FACTS = 5 MIN_CONFIDENCE = 0.5 - EXTRACTION_MODEL = 'gpt-4o-mini'.freeze MAX_CHARS = 40_000 # matches Captain::Llm::ConversationInsightService convention SCOPE_PATTERN = /\A(global|unit:\d+)\z/ + def self.extraction_model + Captain::Llm::ProviderConfig.light_model + end + def initialize(conversation:) @conversation = conversation end @@ -28,7 +31,7 @@ class Captain::ContactMemories::ExtractionService # TODO(phase-6): add Integrations::LlmInstrumentation wrap for OTEL metrics # (extraction_count, extraction_cost, facts_per_call, llm_error_rate). def call_llm - RubyLLM.chat(model: EXTRACTION_MODEL) + RubyLLM.chat(model: self.class.extraction_model) .with_temperature(0) .with_params(response_format: { type: 'json_object' }) .ask(build_prompt) diff --git a/enterprise/app/services/captain/llm/embedding_service.rb b/enterprise/app/services/captain/llm/embedding_service.rb index 2fac54594..7cbb3c03c 100644 --- a/enterprise/app/services/captain/llm/embedding_service.rb +++ b/enterprise/app/services/captain/llm/embedding_service.rb @@ -17,7 +17,7 @@ class Captain::Llm::EmbeddingService return [] if content.blank? instrument_embedding_call(instrumentation_params(content, model)) do - RubyLLM.embed(content, model: model).vectors + embed_with_legacy_openai(content, model) end rescue RubyLLM::Error => e Rails.logger.error "Embedding API Error: #{e.message}" @@ -26,6 +26,17 @@ class Captain::Llm::EmbeddingService private + # Embeddings sempre vão direto pra OpenAI tradicional — o endpoint Codex + # via ChatGPT OAuth não expõe /embeddings. + def embed_with_legacy_openai(content, model) + legacy = Captain::Llm::ProviderConfig.legacy_openai_settings + api_base = legacy[:api_base].present? ? "#{legacy[:api_base]}/v1" : nil + + Llm::Config.with_api_key(legacy[:api_key], api_base: api_base) do |ctx| + ctx.embed(content, model: model).vectors + end + end + def instrumentation_params(content, model) { span_name: 'llm.captain.embedding', diff --git a/enterprise/app/services/captain/llm/provider_config.rb b/enterprise/app/services/captain/llm/provider_config.rb index 5cf1fdbda..ba84be7c3 100644 --- a/enterprise/app/services/captain/llm/provider_config.rb +++ b/enterprise/app/services/captain/llm/provider_config.rb @@ -18,6 +18,19 @@ class Captain::Llm::ProviderConfig DEFAULT_CODEX_PROXY_URL = 'http://localhost:3000/codex'.freeze DUMMY_API_KEY = 'codex-oauth'.freeze + # Modelo padrão pro Codex. gpt-5.2 é o mais recente reconhecido pelo RubyLLM + # (gpt-5.4 ainda não está no catalog do gem). Ambos são suportados pelo + # endpoint Codex da OpenAI via ChatGPT Plus. + DEFAULT_CODEX_MODEL = 'gpt-5.2'.freeze + + # Modelo leve pra tasks de background (extração de memória, verificação de + # contradição, traduções internas). Quando usamos Codex, reutilizamos o + # mesmo modelo do chat — o endpoint não expõe gpt-4o-mini. + LIGHT_MODEL_DEFAULTS = { + 'openai_api' => 'gpt-4o-mini', + 'openai_codex_oauth' => DEFAULT_CODEX_MODEL + }.freeze + class << self def provider cfg('CAPTAIN_LLM_PROVIDER').presence || 'openai_api' @@ -49,13 +62,29 @@ class Captain::Llm::ProviderConfig settings[:model] end + # Modelo pra tasks leves (memory extraction, contradiction check, etc). + # Respeita a flag de provider: em Codex OAuth, usa o mesmo modelo do chat. + def light_model + LIGHT_MODEL_DEFAULTS[provider] || LIGHT_MODEL_DEFAULTS['openai_api'] + end + + # Settings sempre da OpenAI tradicional, independente do provider. + # Usado por recursos que o endpoint Codex NÃO expõe: /embeddings e Files API. + # Lança AuthError se não houver CAPTAIN_OPEN_AI_API_KEY configurada. + def legacy_openai_settings + { + api_key: cfg('CAPTAIN_OPEN_AI_API_KEY'), + api_base: (cfg('CAPTAIN_OPEN_AI_ENDPOINT').presence || DEFAULT_OPENAI_ENDPOINT).chomp('/') + } + end + private def codex_settings { api_key: DUMMY_API_KEY, api_base: (cfg('CAPTAIN_CODEX_PROXY_URL').presence || DEFAULT_CODEX_PROXY_URL).chomp('/'), - model: cfg('CAPTAIN_OPEN_AI_MODEL').presence || default_codex_model + model: cfg('CAPTAIN_OPEN_AI_MODEL').presence || DEFAULT_CODEX_MODEL } end @@ -67,10 +96,6 @@ class Captain::Llm::ProviderConfig } end - def default_codex_model - 'gpt-5.4' - end - def cfg(name) InstallationConfig.find_by(name: name)&.value end diff --git a/enterprise/app/services/captain/llm/translate_query_service.rb b/enterprise/app/services/captain/llm/translate_query_service.rb index 404a44755..57d9a3061 100644 --- a/enterprise/app/services/captain/llm/translate_query_service.rb +++ b/enterprise/app/services/captain/llm/translate_query_service.rb @@ -1,5 +1,7 @@ class Captain::Llm::TranslateQueryService < Captain::BaseTaskService - MODEL = 'gpt-4.1-nano'.freeze + def self.model + Captain::Llm::ProviderConfig.light_model + end pattr_initialize [:account!] @@ -11,7 +13,7 @@ class Captain::Llm::TranslateQueryService < Captain::BaseTaskService { role: 'user', content: query } ] - response = make_api_call(model: MODEL, messages: messages) + response = make_api_call(model: self.class.model, messages: messages) return query if response[:error] response[:message].strip diff --git a/spec/enterprise/services/captain/llm/provider_config_spec.rb b/spec/enterprise/services/captain/llm/provider_config_spec.rb index 7235a21dc..5e9ee518e 100644 --- a/spec/enterprise/services/captain/llm/provider_config_spec.rb +++ b/spec/enterprise/services/captain/llm/provider_config_spec.rb @@ -37,8 +37,8 @@ RSpec.describe Captain::Llm::ProviderConfig do expect(settings[:api_base]).to eq('http://localhost:3000/codex') end - it 'falls back to default gpt-5.4 model when no custom model is set' do - expect(described_class.settings[:model]).to eq('gpt-5.4') + it 'falls back to DEFAULT_CODEX_MODEL when no custom model is set' do + expect(described_class.settings[:model]).to eq(described_class::DEFAULT_CODEX_MODEL) end it 'honors CAPTAIN_OPEN_AI_MODEL override even with Codex OAuth' do