feat: add audio transcoding support for WhatsApp Cloud API (#220)
* feat: add audio transcoding support for WhatsApp Cloud API - Introduced `Audio::TranscodeService` to handle audio transcoding to OGG/Opus format. - Updated `Messages::MessageBuilder` to transcode audio attachments based on `transcode_audio` parameter. - Enhanced `WhatsappCloudService` to normalize audio content types and send voice flag for recorded audio in OGG format. - Added utility functions for audio conversion in JavaScript. - Updated Dockerfile to include FFmpeg for audio processing. - Added tests for audio transcoding and WhatsApp Cloud service interactions. * feat: enhance audio handling with transcoding support and error management * feat: improve audio transcoding error handling and enhance audio recording features * feat: enhance audio transcoding process and error handling for better reliability * feat: update recorded audio handling to support boolean and array formats
This commit is contained in:
parent
2d4e851de7
commit
ce39e54308
1
Gemfile
1
Gemfile
@ -54,6 +54,7 @@ gem 'aws-sdk-s3', require: false
|
||||
gem 'azure-storage-blob', git: 'https://github.com/chatwoot/azure-storage-ruby', branch: 'chatwoot', require: false
|
||||
gem 'google-cloud-storage', '>= 1.48.0', require: false
|
||||
gem 'image_processing'
|
||||
gem 'streamio-ffmpeg', '~> 3.0'
|
||||
|
||||
##-- for actionmailbox --##
|
||||
gem 'aws-actionmailbox-ses', '~> 0'
|
||||
|
||||
@ -937,6 +937,8 @@ GEM
|
||||
squasher (0.7.2)
|
||||
stackprof (0.2.25)
|
||||
statsd-ruby (1.5.0)
|
||||
streamio-ffmpeg (3.0.2)
|
||||
multi_json (~> 1.8)
|
||||
stripe (18.0.1)
|
||||
telephone_number (1.4.20)
|
||||
test-prof (1.2.1)
|
||||
@ -1151,6 +1153,7 @@ DEPENDENCIES
|
||||
spring-watcher-listen
|
||||
squasher
|
||||
stackprof
|
||||
streamio-ffmpeg (~> 3.0)
|
||||
stripe (~> 18.0)
|
||||
telephone_number
|
||||
test-prof
|
||||
|
||||
@ -14,6 +14,7 @@ class Messages::MessageBuilder # rubocop:disable Metrics/ClassLength
|
||||
@message_type = params[:message_type] || 'outgoing'
|
||||
@attachments = params[:attachments]
|
||||
@is_recorded_audio = params[:is_recorded_audio]
|
||||
@transcode_audio = params[:transcode_audio]
|
||||
@attachments_metadata = normalize_attachments_metadata(params[:attachments_metadata])
|
||||
@automation_rule = content_attributes&.dig(:automation_rule_id)
|
||||
return unless params.instance_of?(ActionController::Parameters)
|
||||
@ -67,6 +68,7 @@ class Messages::MessageBuilder # rubocop:disable Metrics/ClassLength
|
||||
else
|
||||
file_type(uploaded_attachment&.content_type)
|
||||
end
|
||||
transcode_attachment(attachment, file_like_source(uploaded_attachment)) if should_transcode?(attachment)
|
||||
end
|
||||
end
|
||||
|
||||
@ -78,9 +80,9 @@ class Messages::MessageBuilder # rubocop:disable Metrics/ClassLength
|
||||
end
|
||||
|
||||
def recorded_audio_metadata(attachment) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
||||
# NOTE: `is_recorded_audio` can be either a boolean or an array of file names.
|
||||
# NOTE: `is_recorded_audio` can be either a boolean, the string "true", or an array of file names.
|
||||
return unless @is_recorded_audio
|
||||
return { is_recorded_audio: true } if @is_recorded_audio == true
|
||||
return { is_recorded_audio: true } if @is_recorded_audio == true || @is_recorded_audio == 'true'
|
||||
|
||||
return { is_recorded_audio: true } if @is_recorded_audio.is_a?(Array) && attachment.original_filename.in?(@is_recorded_audio)
|
||||
|
||||
@ -110,6 +112,27 @@ class Messages::MessageBuilder # rubocop:disable Metrics/ClassLength
|
||||
metadata.deep_stringify_keys
|
||||
end
|
||||
|
||||
def should_transcode?(attachment)
|
||||
@transcode_audio.present? && attachment.file_type == 'audio'
|
||||
end
|
||||
|
||||
# Returns the uploaded file only when it's a real file-like object (ActionDispatch::Http::UploadedFile,
|
||||
# Tempfile, etc.). Direct-upload signed-ID Strings are not usable as source files for transcoding;
|
||||
# TranscodeService falls back to downloading from the blob in that case.
|
||||
def file_like_source(uploaded_attachment)
|
||||
return uploaded_attachment if uploaded_attachment.respond_to?(:path) || uploaded_attachment.respond_to?(:tempfile)
|
||||
end
|
||||
|
||||
def transcode_attachment(attachment, uploaded_file = nil)
|
||||
Audio::TranscodeService.new(attachment, @transcode_audio, source_file: uploaded_file).perform
|
||||
attachment.meta ||= {}
|
||||
attachment.meta['is_recorded_audio'] = true
|
||||
rescue CustomExceptions::Audio::UnsupportedFormatError, CustomExceptions::Audio::TranscodingError => e
|
||||
Rails.logger.error("Audio transcoding failed, keeping original attachment: #{e.message}")
|
||||
attachment.meta ||= {}
|
||||
attachment.meta['audio_transcoding_failed'] = true
|
||||
end
|
||||
|
||||
def process_emails
|
||||
return unless @conversation.inbox&.inbox_type == 'Email'
|
||||
|
||||
|
||||
@ -23,9 +23,13 @@ export const buildCreatePayload = ({
|
||||
files.forEach(file => {
|
||||
payload.append('attachments[]', file);
|
||||
});
|
||||
isRecordedAudio?.forEach(filename => {
|
||||
payload.append('is_recorded_audio[]', filename);
|
||||
});
|
||||
if (isRecordedAudio === true) {
|
||||
payload.append('is_recorded_audio', true);
|
||||
} else if (Array.isArray(isRecordedAudio)) {
|
||||
isRecordedAudio.forEach(filename => {
|
||||
payload.append('is_recorded_audio[]', filename);
|
||||
});
|
||||
}
|
||||
payload.append('private', isPrivate);
|
||||
payload.append('echo_id', echoId);
|
||||
payload.append('cc_emails', ccEmails);
|
||||
|
||||
@ -4,7 +4,7 @@ import { ref, onMounted, onUnmounted, defineEmits, defineExpose } from 'vue';
|
||||
import WaveSurfer from 'wavesurfer.js';
|
||||
import RecordPlugin from 'wavesurfer.js/dist/plugins/record.js';
|
||||
import { format, intervalToDuration } from 'date-fns';
|
||||
import { convertAudio } from './utils/mp3ConversionUtils';
|
||||
import { convertAudio } from './utils/audioConversionUtils';
|
||||
|
||||
const props = defineProps({
|
||||
audioRecordFormat: {
|
||||
@ -18,6 +18,7 @@ const emit = defineEmits([
|
||||
'finishRecord',
|
||||
'pause',
|
||||
'play',
|
||||
'recordError',
|
||||
]);
|
||||
|
||||
const waveformContainer = ref(null);
|
||||
@ -26,6 +27,7 @@ const record = ref(null);
|
||||
const isRecording = ref(false);
|
||||
const isPlaying = ref(false);
|
||||
const hasRecording = ref(false);
|
||||
const recordedAudioUrl = ref(null);
|
||||
|
||||
const formatTimeProgress = time => {
|
||||
const duration = intervalToDuration({ start: 0, end: time });
|
||||
@ -35,6 +37,28 @@ const formatTimeProgress = time => {
|
||||
);
|
||||
};
|
||||
|
||||
const AUDIO_EXTENSION_MAP = {
|
||||
'audio/ogg': 'ogg',
|
||||
'audio/mp3': 'mp3',
|
||||
'audio/mpeg': 'mp3',
|
||||
'audio/wav': 'wav',
|
||||
'audio/webm': 'webm',
|
||||
};
|
||||
|
||||
const getRecordPluginOptions = audioFormat => {
|
||||
const options = {
|
||||
scrollingWaveform: true,
|
||||
renderRecordedAudio: false,
|
||||
};
|
||||
if (
|
||||
audioFormat === 'audio/ogg' &&
|
||||
MediaRecorder.isTypeSupported('audio/ogg;codecs=opus')
|
||||
) {
|
||||
options.mimeType = 'audio/ogg;codecs=opus';
|
||||
}
|
||||
return options;
|
||||
};
|
||||
|
||||
const initWaveSurfer = () => {
|
||||
wavesurfer.value = WaveSurfer.create({
|
||||
container: waveformContainer.value,
|
||||
@ -45,10 +69,7 @@ const initWaveSurfer = () => {
|
||||
barGap: 1,
|
||||
barRadius: 2,
|
||||
plugins: [
|
||||
RecordPlugin.create({
|
||||
scrollingWaveform: true,
|
||||
renderRecordedAudio: false,
|
||||
}),
|
||||
RecordPlugin.create(getRecordPluginOptions(props.audioRecordFormat)),
|
||||
],
|
||||
});
|
||||
|
||||
@ -62,21 +83,29 @@ const initWaveSurfer = () => {
|
||||
});
|
||||
|
||||
record.value.on('record-end', async blob => {
|
||||
const audioUrl = URL.createObjectURL(blob);
|
||||
const audioBlob = await convertAudio(blob, props.audioRecordFormat);
|
||||
const fileName = `${getUuid()}.mp3`;
|
||||
const file = new File([audioBlob], fileName, {
|
||||
type: props.audioRecordFormat,
|
||||
});
|
||||
wavesurfer.value.load(audioUrl);
|
||||
emit('finishRecord', {
|
||||
name: file.name,
|
||||
type: file.type,
|
||||
size: file.size,
|
||||
file,
|
||||
});
|
||||
hasRecording.value = true;
|
||||
isRecording.value = false;
|
||||
try {
|
||||
const audioBlob = await convertAudio(blob, props.audioRecordFormat);
|
||||
const ext = AUDIO_EXTENSION_MAP[props.audioRecordFormat] || 'mp3';
|
||||
const fileName = `${getUuid()}.${ext}`;
|
||||
const file = new File([audioBlob], fileName, {
|
||||
type: props.audioRecordFormat,
|
||||
});
|
||||
if (recordedAudioUrl.value) URL.revokeObjectURL(recordedAudioUrl.value);
|
||||
recordedAudioUrl.value = URL.createObjectURL(audioBlob);
|
||||
wavesurfer.value.load(recordedAudioUrl.value);
|
||||
emit('finishRecord', {
|
||||
name: file.name,
|
||||
type: file.type,
|
||||
size: file.size,
|
||||
file,
|
||||
});
|
||||
hasRecording.value = true;
|
||||
isRecording.value = false;
|
||||
} catch (error) {
|
||||
isRecording.value = false;
|
||||
hasRecording.value = false;
|
||||
emit('recordError', { error });
|
||||
}
|
||||
});
|
||||
|
||||
record.value.on('record-progress', time => {
|
||||
@ -109,6 +138,10 @@ onMounted(() => {
|
||||
});
|
||||
|
||||
onUnmounted(() => {
|
||||
if (recordedAudioUrl.value) {
|
||||
URL.revokeObjectURL(recordedAudioUrl.value);
|
||||
recordedAudioUrl.value = null;
|
||||
}
|
||||
if (wavesurfer.value) {
|
||||
wavesurfer.value.destroy();
|
||||
}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
import lamejs from '@breezystack/lamejs';
|
||||
|
||||
import { remuxWebmToOgg } from './webmOpusToOgg';
|
||||
|
||||
const writeString = (view, offset, string) => {
|
||||
// eslint-disable-next-line no-plusplus
|
||||
for (let i = 0; i < string.length; i++) {
|
||||
@ -135,7 +137,10 @@ export const convertToMp3 = async (audioBlob, bitrate = 128) => {
|
||||
|
||||
export const convertAudio = async (inputBlob, outputFormat, bitrate = 128) => {
|
||||
let audio;
|
||||
if (outputFormat === 'audio/wav') {
|
||||
if (outputFormat === 'audio/ogg') {
|
||||
// Chrome produces WebM even when OGG is requested; remux to proper OGG/Opus
|
||||
audio = await remuxWebmToOgg(inputBlob);
|
||||
} else if (outputFormat === 'audio/wav') {
|
||||
audio = await convertToWav(inputBlob);
|
||||
} else if (outputFormat === 'audio/mp3') {
|
||||
audio = await convertToMp3(inputBlob, bitrate);
|
||||
@ -0,0 +1,354 @@
|
||||
/* eslint-disable no-bitwise */
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { remuxWebmToOgg } from '../webmOpusToOgg';
|
||||
|
||||
/**
|
||||
* Helper: build a Blob from a Uint8Array.
|
||||
* jsdom's Blob may lack .arrayBuffer(), so we polyfill it.
|
||||
*/
|
||||
function blobFrom(bytes) {
|
||||
const blob = new Blob([bytes], { type: 'audio/webm' });
|
||||
if (!blob.arrayBuffer) {
|
||||
blob.arrayBuffer = () =>
|
||||
new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => resolve(reader.result);
|
||||
reader.onerror = () => reject(reader.error);
|
||||
reader.readAsArrayBuffer(blob);
|
||||
});
|
||||
}
|
||||
return blob;
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely read a Blob's ArrayBuffer (works even if Blob.arrayBuffer is missing in jsdom).
|
||||
*/
|
||||
async function readBlobAsArrayBuffer(blob) {
|
||||
if (blob.arrayBuffer) {
|
||||
return blob.arrayBuffer();
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => resolve(reader.result);
|
||||
reader.onerror = () => reject(reader.error);
|
||||
reader.readAsArrayBuffer(blob);
|
||||
});
|
||||
}
|
||||
|
||||
// --- EBML element helpers (shared across tests) ---
|
||||
|
||||
function writeVint(value) {
|
||||
// 1-byte VINT for values 0-126
|
||||
if (value < 0x7f) return [0x80 | value];
|
||||
// 2-byte VINT for values up to 0x3fff
|
||||
return [0x40 | ((value >> 8) & 0x3f), value & 0xff];
|
||||
}
|
||||
|
||||
function writeId(id) {
|
||||
if (id <= 0xff) return [id];
|
||||
if (id <= 0xffff) return [(id >> 8) & 0xff, id & 0xff];
|
||||
if (id <= 0xffffff) return [(id >> 16) & 0xff, (id >> 8) & 0xff, id & 0xff];
|
||||
return [(id >> 24) & 0xff, (id >> 16) & 0xff, (id >> 8) & 0xff, id & 0xff];
|
||||
}
|
||||
|
||||
function element(id, payload) {
|
||||
return [...writeId(id), ...writeVint(payload.length), ...payload];
|
||||
}
|
||||
|
||||
function masterUnknown(id, children) {
|
||||
// Unknown size: 0x01 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF (8-byte VINT all-ones)
|
||||
const childBytes = children.flat();
|
||||
return [
|
||||
...writeId(id),
|
||||
0x01,
|
||||
0xff,
|
||||
0xff,
|
||||
0xff,
|
||||
0xff,
|
||||
0xff,
|
||||
0xff,
|
||||
0xff,
|
||||
...childBytes,
|
||||
];
|
||||
}
|
||||
|
||||
function master(id, children) {
|
||||
const childBytes = children.flat();
|
||||
return element(id, childBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a minimal valid WebM-like buffer that contains at least one
|
||||
* SimpleBlock with a synthetic Opus packet.
|
||||
*
|
||||
* Layout (simplified EBML):
|
||||
* EBML Header (master)
|
||||
* Segment (master, unknown size)
|
||||
* Tracks (master)
|
||||
* TrackEntry (master)
|
||||
* Audio (master)
|
||||
* Channels (uint, 1)
|
||||
* SamplingFrequency (float64, 48000.0)
|
||||
* Cluster (master, unknown size)
|
||||
* SimpleBlock (track=1, timecode=0, flags=0, opus packet)
|
||||
*/
|
||||
function buildMinimalWebM() {
|
||||
const parts = [];
|
||||
|
||||
// Channels = 1 (element 0x9F, uint8)
|
||||
const channels = element(0x9f, [1]);
|
||||
|
||||
// SamplingFrequency = 48000.0 (element 0xB5, float64)
|
||||
const freqBuf = new ArrayBuffer(8);
|
||||
new DataView(freqBuf).setFloat64(0, 48000.0);
|
||||
const freqBytes = [...new Uint8Array(freqBuf)];
|
||||
const samplingFreq = element(0xb5, freqBytes);
|
||||
|
||||
// Audio master (0xE1)
|
||||
const audio = master(0xe1, [channels, samplingFreq]);
|
||||
|
||||
// TrackEntry (0xAE)
|
||||
const trackEntry = master(0xae, [audio]);
|
||||
|
||||
// Tracks (0x1654AE6B)
|
||||
const tracks = master(0x1654ae6b, [trackEntry]);
|
||||
|
||||
// Build a SimpleBlock (0xA3)
|
||||
// Track number = 1 (VINT: 0x81), timecode = 0 (int16 BE: 0x00 0x00), flags = 0x00
|
||||
// Followed by a synthetic Opus packet (TOC byte = 0xFC → config=31 CELT FB 20ms, code=0 → 1 frame)
|
||||
const opusPacket = [0xfc, 0x00, 0x01, 0x02, 0x03]; // 5-byte synthetic Opus packet
|
||||
const simpleBlockPayload = [0x81, 0x00, 0x00, 0x00, ...opusPacket]; // track=1, timecode=0, flags=0
|
||||
const simpleBlock = element(0xa3, simpleBlockPayload);
|
||||
|
||||
// Cluster (0x1F43B675) with unknown size
|
||||
const cluster = masterUnknown(0x1f43b675, [simpleBlock]);
|
||||
|
||||
// Segment (0x18538067) with unknown size
|
||||
const segment = masterUnknown(0x18538067, [tracks, cluster]);
|
||||
|
||||
// EBML Header (0x1A45DFA3) — minimal
|
||||
const ebmlHeader = master(0x1a45dfa3, []);
|
||||
|
||||
parts.push(...ebmlHeader, ...segment);
|
||||
|
||||
return new Uint8Array(parts);
|
||||
}
|
||||
|
||||
describe('remuxWebmToOgg', () => {
|
||||
it('returns the original Blob when input starts with OggS', async () => {
|
||||
const oggBytes = new Uint8Array([0x4f, 0x67, 0x67, 0x53, 0x00, 0x01, 0x02]);
|
||||
const oggBlob = blobFrom(oggBytes);
|
||||
|
||||
const result = await remuxWebmToOgg(oggBlob);
|
||||
|
||||
// Should be the exact same Blob reference (passthrough)
|
||||
expect(result).toBe(oggBlob);
|
||||
});
|
||||
|
||||
it('throws an error when parseWebM yields no frames', async () => {
|
||||
// An empty Blob (no EBML data, no OggS magic) → parseWebM finds no frames
|
||||
const emptyBlob = blobFrom(new Uint8Array([0x00, 0x00, 0x00, 0x00]));
|
||||
|
||||
await expect(remuxWebmToOgg(emptyBlob)).rejects.toThrow(
|
||||
'No Opus frames found in WebM input'
|
||||
);
|
||||
});
|
||||
|
||||
it('remuxes a minimal WebM input into valid OGG output', async () => {
|
||||
const webmBytes = buildMinimalWebM();
|
||||
const webmBlob = blobFrom(webmBytes);
|
||||
|
||||
const result = await remuxWebmToOgg(webmBlob);
|
||||
|
||||
expect(result).toBeInstanceOf(Blob);
|
||||
expect(result.type).toBe('audio/ogg');
|
||||
|
||||
const outBuf = await readBlobAsArrayBuffer(result);
|
||||
const outBytes = new Uint8Array(outBuf);
|
||||
|
||||
// Must start with OggS capture pattern
|
||||
expect(outBytes[0]).toBe(0x4f); // O
|
||||
expect(outBytes[1]).toBe(0x67); // g
|
||||
expect(outBytes[2]).toBe(0x67); // g
|
||||
expect(outBytes[3]).toBe(0x53); // S
|
||||
|
||||
// Count OGG pages (each starts with "OggS")
|
||||
let pageCount = 0;
|
||||
for (let i = 0; i <= outBytes.length - 4; i += 1) {
|
||||
if (
|
||||
outBytes[i] === 0x4f &&
|
||||
outBytes[i + 1] === 0x67 &&
|
||||
outBytes[i + 2] === 0x67 &&
|
||||
outBytes[i + 3] === 0x53
|
||||
) {
|
||||
pageCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// At least 3 pages: OpusHead (BOS) + OpusTags + audio page(s)
|
||||
expect(pageCount).toBeGreaterThanOrEqual(3);
|
||||
});
|
||||
|
||||
it('has sequential page numbers in OGG output', async () => {
|
||||
const webmBytes = buildMinimalWebM();
|
||||
const webmBlob = blobFrom(webmBytes);
|
||||
|
||||
const result = await remuxWebmToOgg(webmBlob);
|
||||
const outBuf = await readBlobAsArrayBuffer(result);
|
||||
const outBytes = new Uint8Array(outBuf);
|
||||
const dv = new DataView(outBuf);
|
||||
|
||||
// Collect page sequence numbers from OGG pages (offset 18 in each page header)
|
||||
const pageSeqs = [];
|
||||
for (let i = 0; i <= outBytes.length - 27; i += 1) {
|
||||
if (
|
||||
outBytes[i] === 0x4f &&
|
||||
outBytes[i + 1] === 0x67 &&
|
||||
outBytes[i + 2] === 0x67 &&
|
||||
outBytes[i + 3] === 0x53
|
||||
) {
|
||||
pageSeqs.push(dv.getUint32(i + 18, true));
|
||||
}
|
||||
}
|
||||
|
||||
// Pages should be 0, 1, 2, ...
|
||||
pageSeqs.forEach((seq, idx) => {
|
||||
expect(seq).toBe(idx);
|
||||
});
|
||||
});
|
||||
|
||||
it('has the same serial number across all pages', async () => {
|
||||
const webmBytes = buildMinimalWebM();
|
||||
const webmBlob = blobFrom(webmBytes);
|
||||
|
||||
const result = await remuxWebmToOgg(webmBlob);
|
||||
const outBuf = await readBlobAsArrayBuffer(result);
|
||||
const outBytes = new Uint8Array(outBuf);
|
||||
const dv = new DataView(outBuf);
|
||||
|
||||
const serials = [];
|
||||
for (let i = 0; i <= outBytes.length - 27; i += 1) {
|
||||
if (
|
||||
outBytes[i] === 0x4f &&
|
||||
outBytes[i + 1] === 0x67 &&
|
||||
outBytes[i + 2] === 0x67 &&
|
||||
outBytes[i + 3] === 0x53
|
||||
) {
|
||||
serials.push(dv.getUint32(i + 14, true));
|
||||
}
|
||||
}
|
||||
|
||||
// All pages share the same serial
|
||||
const unique = [...new Set(serials)];
|
||||
expect(unique).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('first page contains OpusHead', async () => {
|
||||
const webmBytes = buildMinimalWebM();
|
||||
const webmBlob = blobFrom(webmBytes);
|
||||
|
||||
const result = await remuxWebmToOgg(webmBlob);
|
||||
const outBuf = await readBlobAsArrayBuffer(result);
|
||||
const outBytes = new Uint8Array(outBuf);
|
||||
|
||||
// First page is BOS (header_type byte at offset 5 has bit 0x02 set)
|
||||
expect(outBytes[5] & 0x02).toBe(0x02);
|
||||
|
||||
// Find the segment data in first page and check for OpusHead magic
|
||||
const numSegments = outBytes[26];
|
||||
const dataStart = 27 + numSegments;
|
||||
const magic = new TextDecoder().decode(
|
||||
outBytes.slice(dataStart, dataStart + 8)
|
||||
);
|
||||
expect(magic).toBe('OpusHead');
|
||||
});
|
||||
|
||||
it('second page contains OpusTags', async () => {
|
||||
const webmBytes = buildMinimalWebM();
|
||||
const webmBlob = blobFrom(webmBytes);
|
||||
|
||||
const result = await remuxWebmToOgg(webmBlob);
|
||||
const outBuf = await readBlobAsArrayBuffer(result);
|
||||
const outBytes = new Uint8Array(outBuf);
|
||||
|
||||
// Find second OggS page
|
||||
const pageStarts = [];
|
||||
for (let i = 0; i <= outBytes.length - 4; i += 1) {
|
||||
if (
|
||||
outBytes[i] === 0x4f &&
|
||||
outBytes[i + 1] === 0x67 &&
|
||||
outBytes[i + 2] === 0x67 &&
|
||||
outBytes[i + 3] === 0x53
|
||||
) {
|
||||
pageStarts.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
expect(pageStarts.length).toBeGreaterThanOrEqual(2);
|
||||
const page2Start = pageStarts[1];
|
||||
const numSegments = outBytes[page2Start + 26];
|
||||
const dataStart = page2Start + 27 + numSegments;
|
||||
const magic = new TextDecoder().decode(
|
||||
outBytes.slice(dataStart, dataStart + 8)
|
||||
);
|
||||
expect(magic).toBe('OpusTags');
|
||||
});
|
||||
|
||||
it('last page has EOS flag set', async () => {
|
||||
const webmBytes = buildMinimalWebM();
|
||||
const webmBlob = blobFrom(webmBytes);
|
||||
|
||||
const result = await remuxWebmToOgg(webmBlob);
|
||||
const outBuf = await readBlobAsArrayBuffer(result);
|
||||
const outBytes = new Uint8Array(outBuf);
|
||||
|
||||
// Find the last OggS page
|
||||
let lastPageStart = -1;
|
||||
for (let i = 0; i <= outBytes.length - 4; i += 1) {
|
||||
if (
|
||||
outBytes[i] === 0x4f &&
|
||||
outBytes[i + 1] === 0x67 &&
|
||||
outBytes[i + 2] === 0x67 &&
|
||||
outBytes[i + 3] === 0x53
|
||||
) {
|
||||
lastPageStart = i;
|
||||
}
|
||||
}
|
||||
|
||||
expect(lastPageStart).toBeGreaterThan(0);
|
||||
// EOS flag = 0x04
|
||||
expect(outBytes[lastPageStart + 5] & 0x04).toBe(0x04);
|
||||
});
|
||||
|
||||
it('logs warning for laced SimpleBlock', async () => {
|
||||
const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
|
||||
// Build a WebM with a laced SimpleBlock (flags byte with lacing bits set)
|
||||
const parts = [];
|
||||
|
||||
const tracks = master(0x1654ae6b, [
|
||||
master(0xae, [master(0xe1, [element(0x9f, [1])])]),
|
||||
]);
|
||||
|
||||
// SimpleBlock with lacing bits set (flags = 0x06 → Xiph lacing)
|
||||
const opusPacket = [0xfc, 0x00, 0x01];
|
||||
const simpleBlockPayload = [0x81, 0x00, 0x00, 0x06, ...opusPacket];
|
||||
const simpleBlock = element(0xa3, simpleBlockPayload);
|
||||
const cluster = masterUnknown(0x1f43b675, [simpleBlock]);
|
||||
const segment = masterUnknown(0x18538067, [tracks, cluster]);
|
||||
const ebmlHeader = master(0x1a45dfa3, []);
|
||||
|
||||
parts.push(...ebmlHeader, ...segment);
|
||||
|
||||
const webmBlob = blobFrom(new Uint8Array(parts));
|
||||
// Should still produce output (not crash), but warn
|
||||
const result = await remuxWebmToOgg(webmBlob);
|
||||
expect(result).toBeInstanceOf(Blob);
|
||||
|
||||
expect(consoleSpy).toHaveBeenCalledWith(
|
||||
expect.stringContaining('laced SimpleBlock detected')
|
||||
);
|
||||
|
||||
consoleSpy.mockRestore();
|
||||
});
|
||||
});
|
||||
@ -0,0 +1,454 @@
|
||||
/* eslint-disable no-bitwise */
|
||||
/**
|
||||
* WebM/Opus → OGG/Opus remuxer
|
||||
*
|
||||
* Chrome's MediaRecorder produces WebM containers even when
|
||||
* `audio/ogg;codecs=opus` is requested. WhatsApp Cloud API requires
|
||||
* proper OGG/Opus files for voice messages.
|
||||
*
|
||||
* This module extracts raw Opus packets from the WebM (EBML) container
|
||||
* and repackages them into a valid OGG bitstream. The audio data itself
|
||||
* is never re-encoded — only the container format changes.
|
||||
*
|
||||
* References:
|
||||
* EBML (container for WebM): RFC 8794 — https://www.rfc-editor.org/rfc/rfc8794
|
||||
* Matroska/WebM elements: https://www.matroska.org/technical/elements.html
|
||||
* OGG bitstream framing: RFC 3533 — https://www.rfc-editor.org/rfc/rfc3533
|
||||
* Opus codec: RFC 6716 — https://www.rfc-editor.org/rfc/rfc6716
|
||||
* Opus in OGG (OpusHead/Tags): RFC 7845 — https://www.rfc-editor.org/rfc/rfc7845
|
||||
*/
|
||||
|
||||
// ======================== EBML / WebM parser ========================
|
||||
|
||||
const EBML_IDS = {
|
||||
Segment: 0x18538067,
|
||||
SegmentInfo: 0x1549a966,
|
||||
Tracks: 0x1654ae6b,
|
||||
TrackEntry: 0xae,
|
||||
CodecPrivate: 0x63a2,
|
||||
Audio: 0xe1,
|
||||
SamplingFrequency: 0xb5,
|
||||
Channels: 0x9f,
|
||||
Cluster: 0x1f43b675,
|
||||
Timecode: 0xe7,
|
||||
SimpleBlock: 0xa3,
|
||||
BlockGroup: 0xa0,
|
||||
Block: 0xa1,
|
||||
};
|
||||
|
||||
const MASTER_ELEMENTS = new Set([
|
||||
0x1a45dfa3, // EBML header
|
||||
EBML_IDS.Segment,
|
||||
EBML_IDS.SegmentInfo,
|
||||
EBML_IDS.Tracks,
|
||||
EBML_IDS.TrackEntry,
|
||||
EBML_IDS.Audio,
|
||||
EBML_IDS.Cluster,
|
||||
EBML_IDS.BlockGroup,
|
||||
]);
|
||||
|
||||
/** Read an EBML variable-length integer (data size). */
|
||||
function readVint(data, pos) {
|
||||
if (pos >= data.length) return null;
|
||||
const first = data[pos];
|
||||
if (first === 0) return null;
|
||||
|
||||
let len = 1;
|
||||
let mask = 0x80;
|
||||
while (len <= 8 && !(first & mask)) {
|
||||
len += 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
if (len > 8 || pos + len > data.length) return null;
|
||||
|
||||
let value = first & (mask - 1);
|
||||
for (let i = 1; i < len; i += 1) {
|
||||
value = value * 256 + data[pos + i];
|
||||
}
|
||||
return { value, length: len };
|
||||
}
|
||||
|
||||
/** Read an EBML element ID (leading marker bits are kept). */
|
||||
function readElementId(data, pos) {
|
||||
if (pos >= data.length) return null;
|
||||
const first = data[pos];
|
||||
if (first === 0) return null;
|
||||
|
||||
let len = 1;
|
||||
let mask = 0x80;
|
||||
while (len <= 4 && !(first & mask)) {
|
||||
len += 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
if (len > 4 || pos + len > data.length) return null;
|
||||
|
||||
let id = first;
|
||||
for (let i = 1; i < len; i += 1) {
|
||||
id = id * 256 + data[pos + i];
|
||||
}
|
||||
return { id, length: len };
|
||||
}
|
||||
|
||||
function readUintBE(data, offset, length) {
|
||||
let v = 0;
|
||||
for (let i = 0; i < length; i += 1) v = v * 256 + data[offset + i];
|
||||
return v;
|
||||
}
|
||||
|
||||
function readFloatBE(data, offset, length) {
|
||||
if (length !== 4 && length !== 8) return NaN;
|
||||
const buf = new ArrayBuffer(length);
|
||||
const u8 = new Uint8Array(buf);
|
||||
for (let i = 0; i < length; i += 1) u8[i] = data[offset + i];
|
||||
const view = new DataView(buf);
|
||||
return length === 4 ? view.getFloat32(0) : view.getFloat64(0);
|
||||
}
|
||||
|
||||
/** Extract the raw Opus frame from a SimpleBlock / Block element. */
|
||||
function extractFrameFromBlock(data, offset, end) {
|
||||
const trackVint = readVint(data, offset);
|
||||
if (!trackVint) return null;
|
||||
let pos = offset + trackVint.length;
|
||||
|
||||
// int16 relative timecode (big-endian, signed) – skip
|
||||
pos += 2;
|
||||
// Flags byte – skip. Lacing (Xiph/EBML/fixed-size) is NOT supported;
|
||||
// this assumes single-frame blocks as produced by MediaRecorder.
|
||||
const flags = data[pos];
|
||||
const lacingBits = (flags >> 1) & 0x03;
|
||||
if (lacingBits !== 0) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
'webmOpusToOgg: laced SimpleBlock detected (unsupported), frame may be invalid'
|
||||
);
|
||||
}
|
||||
pos += 1;
|
||||
|
||||
if (pos >= end) return null;
|
||||
return data.slice(pos, end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the EBML tree and collect metadata + Opus frames.
|
||||
* We only descend into master elements and only extract the fields we need.
|
||||
*/
|
||||
function parseWebM(buffer) {
|
||||
const data = new Uint8Array(buffer);
|
||||
const result = {
|
||||
channels: 1,
|
||||
sampleRate: 48000,
|
||||
codecPrivate: null,
|
||||
frames: [],
|
||||
};
|
||||
|
||||
function walk(start, end) {
|
||||
let pos = start;
|
||||
while (pos < end) {
|
||||
const idRes = readElementId(data, pos);
|
||||
if (!idRes) break;
|
||||
pos += idRes.length;
|
||||
|
||||
const sizeRes = readVint(data, pos);
|
||||
if (!sizeRes) break;
|
||||
pos += sizeRes.length;
|
||||
|
||||
// Handle "unknown size" (all-ones VINT) by treating it as the rest of the parent
|
||||
// Use Math.pow instead of bit-shift to avoid 32-bit overflow for 5+ byte VINTs
|
||||
const maxVint = 2 ** (7 * sizeRes.length) - 1;
|
||||
const elEnd =
|
||||
sizeRes.value === maxVint ? end : Math.min(pos + sizeRes.value, end);
|
||||
|
||||
if (MASTER_ELEMENTS.has(idRes.id)) {
|
||||
walk(pos, elEnd);
|
||||
} else {
|
||||
switch (idRes.id) {
|
||||
case EBML_IDS.Channels:
|
||||
result.channels = readUintBE(data, pos, sizeRes.value);
|
||||
break;
|
||||
case EBML_IDS.SamplingFrequency:
|
||||
result.sampleRate = readFloatBE(data, pos, sizeRes.value);
|
||||
break;
|
||||
case EBML_IDS.CodecPrivate:
|
||||
result.codecPrivate = data.slice(pos, elEnd);
|
||||
break;
|
||||
case EBML_IDS.SimpleBlock:
|
||||
case EBML_IDS.Block: {
|
||||
const frame = extractFrameFromBlock(data, pos, elEnd);
|
||||
if (frame && frame.length > 0) result.frames.push(frame);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
pos = elEnd;
|
||||
}
|
||||
}
|
||||
|
||||
walk(0, data.length);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ======================== OGG writer ========================
|
||||
|
||||
/** OGG CRC-32 table (polynomial 0x04C11DB7). */
|
||||
const CRC_TABLE = (() => {
|
||||
const t = new Uint32Array(256);
|
||||
for (let i = 0; i < 256; i += 1) {
|
||||
let c = i << 24;
|
||||
for (let j = 0; j < 8; j += 1) {
|
||||
c = ((c << 1) ^ (c & 0x80000000 ? 0x04c11db7 : 0)) >>> 0;
|
||||
}
|
||||
t[i] = c;
|
||||
}
|
||||
return t;
|
||||
})();
|
||||
|
||||
function oggCrc32(bytes) {
|
||||
let crc = 0;
|
||||
for (let i = 0; i < bytes.length; i += 1) {
|
||||
crc = (CRC_TABLE[((crc >>> 24) ^ bytes[i]) & 0xff] ^ (crc << 8)) >>> 0;
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build one OGG page.
|
||||
*
|
||||
* @param {number} headerType 0x02 = BOS, 0x04 = EOS, 0x00 = normal
|
||||
* @param {number} granulePosition 48 kHz sample count
|
||||
* @param {number} serialNumber logical stream id
|
||||
* @param {number} pageSeq page sequence counter
|
||||
* @param {Uint8Array[]} packets one or more complete Opus packets
|
||||
*/
|
||||
function createOggPage(
|
||||
headerType,
|
||||
granulePosition,
|
||||
serialNumber,
|
||||
pageSeq,
|
||||
packets
|
||||
) {
|
||||
// Build the lacing / segment table
|
||||
const segTable = [];
|
||||
let dataLen = 0;
|
||||
packets.forEach(pkt => {
|
||||
let rem = pkt.length;
|
||||
while (rem >= 255) {
|
||||
segTable.push(255);
|
||||
rem -= 255;
|
||||
}
|
||||
segTable.push(rem); // final segment (0 when pkt.length is a multiple of 255)
|
||||
dataLen += pkt.length;
|
||||
});
|
||||
|
||||
const hdrLen = 27 + segTable.length;
|
||||
const page = new Uint8Array(hdrLen + dataLen);
|
||||
const dv = new DataView(page.buffer);
|
||||
|
||||
// Capture pattern
|
||||
page.set([0x4f, 0x67, 0x67, 0x53]); // "OggS"
|
||||
page[4] = 0; // version
|
||||
page[5] = headerType;
|
||||
|
||||
// Granule position (int64 LE)
|
||||
dv.setUint32(6, granulePosition & 0xffffffff, true);
|
||||
dv.setUint32(
|
||||
10,
|
||||
Math.floor(granulePosition / 0x100000000) & 0xffffffff,
|
||||
true
|
||||
);
|
||||
|
||||
dv.setUint32(14, serialNumber, true); // serial
|
||||
dv.setUint32(18, pageSeq, true); // page sequence
|
||||
dv.setUint32(22, 0, true); // CRC placeholder
|
||||
|
||||
page[26] = segTable.length;
|
||||
for (let i = 0; i < segTable.length; i += 1) page[27 + i] = segTable[i];
|
||||
|
||||
let off = hdrLen;
|
||||
packets.forEach(pkt => {
|
||||
page.set(pkt, off);
|
||||
off += pkt.length;
|
||||
});
|
||||
|
||||
// Fill in the CRC
|
||||
dv.setUint32(22, oggCrc32(page), true);
|
||||
return page;
|
||||
}
|
||||
|
||||
// ======================== Opus helpers ========================
|
||||
|
||||
/** Lookup table: frame duration in ms for each Opus TOC config index (0-31). */
|
||||
const OPUS_FRAME_MS = [
|
||||
10,
|
||||
20,
|
||||
40,
|
||||
60, // 0-3 SILK NB
|
||||
10,
|
||||
20,
|
||||
40,
|
||||
60, // 4-7 SILK MB
|
||||
10,
|
||||
20,
|
||||
40,
|
||||
60, // 8-11 SILK WB
|
||||
10,
|
||||
20, // 12-13 Hybrid SWB
|
||||
10,
|
||||
20, // 14-15 Hybrid FB
|
||||
2.5,
|
||||
5,
|
||||
10,
|
||||
20, // 16-19 CELT NB
|
||||
2.5,
|
||||
5,
|
||||
10,
|
||||
20, // 20-23 CELT WB
|
||||
2.5,
|
||||
5,
|
||||
10,
|
||||
20, // 24-27 CELT SWB
|
||||
2.5,
|
||||
5,
|
||||
10,
|
||||
20, // 28-31 CELT FB
|
||||
];
|
||||
|
||||
/** Return the total number of 48 kHz PCM samples represented by an Opus packet. */
|
||||
function opusPacketSamples(pkt) {
|
||||
if (!pkt || pkt.length === 0) return 960; // default 20 ms
|
||||
const toc = pkt[0];
|
||||
const config = (toc >> 3) & 0x1f;
|
||||
const code = toc & 0x03;
|
||||
|
||||
const samplesPerFrame = ((OPUS_FRAME_MS[config] || 20) * 48000) / 1000;
|
||||
let frameCount;
|
||||
if (code <= 1) frameCount = code + 1;
|
||||
else if (code === 2) frameCount = 2;
|
||||
else frameCount = pkt.length >= 2 ? pkt[1] & 0x3f : 1;
|
||||
|
||||
return samplesPerFrame * frameCount;
|
||||
}
|
||||
|
||||
function buildOpusHead(channels, sampleRate, preSkip) {
|
||||
const buf = new Uint8Array(19);
|
||||
const dv = new DataView(buf.buffer);
|
||||
buf.set(new TextEncoder().encode('OpusHead'));
|
||||
buf[8] = 1; // version
|
||||
buf[9] = channels;
|
||||
dv.setUint16(10, preSkip, true);
|
||||
dv.setUint32(12, sampleRate, true);
|
||||
dv.setInt16(16, 0, true); // output gain
|
||||
buf[18] = 0; // channel mapping family
|
||||
return buf;
|
||||
}
|
||||
|
||||
function buildOpusTags() {
|
||||
const vendor = new TextEncoder().encode('chatwoot');
|
||||
const buf = new Uint8Array(8 + 4 + vendor.length + 4);
|
||||
const dv = new DataView(buf.buffer);
|
||||
buf.set(new TextEncoder().encode('OpusTags'));
|
||||
dv.setUint32(8, vendor.length, true);
|
||||
buf.set(vendor, 12);
|
||||
dv.setUint32(12 + vendor.length, 0, true); // 0 user comments
|
||||
return buf;
|
||||
}
|
||||
|
||||
// ======================== Public API ========================
|
||||
|
||||
const MAX_FRAMES_PER_PAGE = 50; // ~1 s at 20 ms/frame
|
||||
const MAX_SEGMENTS_PER_PAGE = 255;
|
||||
|
||||
/**
|
||||
* Remux a WebM/Opus blob into an OGG/Opus blob.
|
||||
* If the input is already OGG (starts with "OggS"), it is returned as-is.
|
||||
*
|
||||
* @param {Blob} webmBlob
|
||||
* @returns {Promise<Blob>} OGG/Opus blob
|
||||
*/
|
||||
export async function remuxWebmToOgg(webmBlob) {
|
||||
const buffer = await webmBlob.arrayBuffer();
|
||||
const bytes = new Uint8Array(buffer);
|
||||
|
||||
// Already OGG? Return unchanged.
|
||||
if (
|
||||
bytes.length >= 4 &&
|
||||
bytes[0] === 0x4f &&
|
||||
bytes[1] === 0x67 &&
|
||||
bytes[2] === 0x67 &&
|
||||
bytes[3] === 0x53
|
||||
) {
|
||||
return webmBlob;
|
||||
}
|
||||
|
||||
const { channels, sampleRate, codecPrivate, frames } = parseWebM(buffer);
|
||||
if (frames.length === 0) {
|
||||
throw new Error('No Opus frames found in WebM input');
|
||||
}
|
||||
|
||||
// Extract pre-skip from the WebM CodecPrivate (which IS the OpusHead)
|
||||
let preSkip = 312;
|
||||
if (codecPrivate && codecPrivate.length >= 12) {
|
||||
const magic = new TextDecoder().decode(codecPrivate.slice(0, 8));
|
||||
if (magic === 'OpusHead') {
|
||||
preSkip = new DataView(
|
||||
codecPrivate.buffer,
|
||||
codecPrivate.byteOffset,
|
||||
codecPrivate.length
|
||||
).getUint16(10, true);
|
||||
}
|
||||
}
|
||||
|
||||
const serial = (Math.random() * 0x100000000) >>> 0;
|
||||
let pageSeq = 0;
|
||||
const pages = [];
|
||||
|
||||
// Page 0 – OpusHead (BOS)
|
||||
pages.push(
|
||||
createOggPage(0x02, 0, serial, pageSeq, [
|
||||
buildOpusHead(channels, sampleRate, preSkip),
|
||||
])
|
||||
);
|
||||
pageSeq += 1;
|
||||
|
||||
// Page 1 – OpusTags
|
||||
pages.push(createOggPage(0x00, 0, serial, pageSeq, [buildOpusTags()]));
|
||||
pageSeq += 1;
|
||||
|
||||
// Audio pages
|
||||
let granule = 0;
|
||||
let idx = 0;
|
||||
|
||||
while (idx < frames.length) {
|
||||
const packets = [];
|
||||
let segs = 0;
|
||||
|
||||
while (idx < frames.length && packets.length < MAX_FRAMES_PER_PAGE) {
|
||||
const pkt = frames[idx];
|
||||
const pktSegs = Math.ceil(pkt.length / 255) || 1;
|
||||
if (segs + pktSegs > MAX_SEGMENTS_PER_PAGE && packets.length > 0) break;
|
||||
|
||||
packets.push(pkt);
|
||||
segs += pktSegs;
|
||||
granule += opusPacketSamples(pkt);
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
const isLast = idx >= frames.length;
|
||||
pages.push(
|
||||
createOggPage(isLast ? 0x04 : 0x00, granule, serial, pageSeq, packets)
|
||||
);
|
||||
pageSeq += 1;
|
||||
}
|
||||
|
||||
// Concatenate pages into a single buffer
|
||||
const total = pages.reduce((s, p) => s + p.length, 0);
|
||||
const out = new Uint8Array(total);
|
||||
let off = 0;
|
||||
pages.forEach(p => {
|
||||
out.set(p, off);
|
||||
off += p.length;
|
||||
});
|
||||
|
||||
return new Blob([out], { type: 'audio/ogg' });
|
||||
}
|
||||
@ -360,6 +360,9 @@ export default {
|
||||
return `draft-${this.conversationIdByRoute}-${this.replyType}`;
|
||||
},
|
||||
audioRecordFormat() {
|
||||
if (this.isAWhatsAppCloudChannel) {
|
||||
return AUDIO_FORMATS.OGG;
|
||||
}
|
||||
if (this.isAWhatsAppChannel || this.isATelegramChannel) {
|
||||
return AUDIO_FORMATS.MP3;
|
||||
}
|
||||
@ -1027,6 +1030,10 @@ export default {
|
||||
};
|
||||
return file && this.onFileUpload(autoRecordedFile);
|
||||
},
|
||||
onRecordError() {
|
||||
this.toggleAudioRecorder();
|
||||
useAlert(this.$t('CONVERSATION.REPLYBOX.AUDIO_CONVERSION_FAILED'));
|
||||
},
|
||||
toggleTyping(status) {
|
||||
const conversationId = this.currentChat.id;
|
||||
const isPrivate = this.isPrivate;
|
||||
@ -1094,6 +1101,13 @@ export default {
|
||||
sender: this.sender,
|
||||
};
|
||||
|
||||
if (attachment.isRecordedAudio) {
|
||||
attachmentPayload.isRecordedAudio = this.globalConfig
|
||||
.directUploadsEnabled
|
||||
? true
|
||||
: [attachment.resource.file.name];
|
||||
}
|
||||
|
||||
attachmentPayload = this.setReplyToInPayload(attachmentPayload);
|
||||
multipleMessagePayload.push(attachmentPayload);
|
||||
// For WhatsApp, only the first attachment gets a caption
|
||||
@ -1142,6 +1156,9 @@ export default {
|
||||
this.attachedFiles.forEach(attachment => {
|
||||
if (this.globalConfig.directUploadsEnabled) {
|
||||
messagePayload.files.push(attachment.blobSignedId);
|
||||
if (attachment.isRecordedAudio) {
|
||||
messagePayload.isRecordedAudio = true;
|
||||
}
|
||||
} else {
|
||||
messagePayload.files.push(attachment.resource.file);
|
||||
if (attachment.isRecordedAudio) {
|
||||
@ -1304,6 +1321,7 @@ export default {
|
||||
:audio-record-format="audioRecordFormat"
|
||||
@recorder-progress-changed="onRecordProgressChanged"
|
||||
@finish-record="onFinishRecorder"
|
||||
@record-error="onRecordError"
|
||||
@play="recordingAudioState = 'playing'"
|
||||
@pause="recordingAudioState = 'paused'"
|
||||
/>
|
||||
|
||||
@ -213,6 +213,7 @@
|
||||
"TIP_AUDIORECORDER_ICON": "Record audio",
|
||||
"TIP_AUDIORECORDER_PERMISSION": "Allow access to audio",
|
||||
"TIP_AUDIORECORDER_ERROR": "Could not open the audio",
|
||||
"AUDIO_CONVERSION_FAILED": "Audio conversion failed. Please try again.",
|
||||
"DRAG_DROP": "Drag and drop here to attach",
|
||||
"START_AUDIO_RECORDING": "Start audio recording",
|
||||
"STOP_AUDIO_RECORDING": "Stop audio recording",
|
||||
|
||||
@ -205,6 +205,7 @@
|
||||
"TIP_AUDIORECORDER_ICON": "Gravar áudio",
|
||||
"TIP_AUDIORECORDER_PERMISSION": "Permitir acesso ao áudio",
|
||||
"TIP_AUDIORECORDER_ERROR": "Não foi possível abrir o áudio",
|
||||
"AUDIO_CONVERSION_FAILED": "Falha na conversão do áudio. Tente novamente.",
|
||||
"DRAG_DROP": "Arraste e solte aqui para anexar",
|
||||
"START_AUDIO_RECORDING": "Iniciar gravação de áudio",
|
||||
"STOP_AUDIO_RECORDING": "Parar gravação de áudio",
|
||||
|
||||
78
app/services/audio/transcode_service.rb
Normal file
78
app/services/audio/transcode_service.rb
Normal file
@ -0,0 +1,78 @@
|
||||
class Audio::TranscodeService
|
||||
SUPPORTED_FORMATS = { 'opus' => { codec: 'libopus', extension: 'ogg', content_type: 'audio/ogg' } }.freeze
|
||||
|
||||
def initialize(attachment, target_format, source_file: nil)
|
||||
@attachment = attachment
|
||||
@target_format = target_format
|
||||
@source_file = source_file
|
||||
end
|
||||
|
||||
def perform
|
||||
validate_format!
|
||||
return if already_in_target_format?
|
||||
|
||||
transcode_attachment
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def already_in_target_format?
|
||||
format_config = SUPPORTED_FORMATS[@target_format]
|
||||
content_type = @attachment.file.content_type
|
||||
return true if content_type == format_config[:content_type]
|
||||
|
||||
# Marcel may detect Opus-in-OGG as audio/opus; treat as already in target format
|
||||
# when transcoding to Opus to avoid unnecessary re-transcoding
|
||||
@target_format == 'opus' && content_type == 'audio/opus'
|
||||
end
|
||||
|
||||
def validate_format!
|
||||
return if SUPPORTED_FORMATS.key?(@target_format)
|
||||
|
||||
raise CustomExceptions::Audio::UnsupportedFormatError,
|
||||
"Unsupported transcode format: #{@target_format}. Supported: #{SUPPORTED_FORMATS.keys.join(', ')}"
|
||||
end
|
||||
|
||||
def transcode_attachment
|
||||
format_config = SUPPORTED_FORMATS[@target_format]
|
||||
input_file = nil
|
||||
output_file = nil
|
||||
input_file = download_to_tempfile
|
||||
output_file = Tempfile.new(['transcoded', ".#{format_config[:extension]}"])
|
||||
movie = FFMPEG::Movie.new(input_file.path)
|
||||
raise CustomExceptions::Audio::TranscodingError, 'Invalid or unreadable audio file' unless movie.valid?
|
||||
|
||||
movie.transcode(output_file.path, audio_codec: format_config[:codec], custom: %w[-vn -map_metadata -1])
|
||||
replace_attachment_file(output_file, format_config)
|
||||
rescue FFMPEG::Error => e
|
||||
raise CustomExceptions::Audio::TranscodingError, "FFmpeg transcoding failed: #{e.message}"
|
||||
ensure
|
||||
input_file&.close!
|
||||
output_file&.close!
|
||||
end
|
||||
|
||||
def download_to_tempfile
|
||||
tempfile = Tempfile.new(['original_audio', File.extname(@attachment.file.filename.to_s)])
|
||||
tempfile.binmode
|
||||
if @source_file && (@source_file.respond_to?(:tempfile) || @source_file.respond_to?(:path))
|
||||
source_path = @source_file.respond_to?(:tempfile) ? @source_file.tempfile.path : @source_file.path
|
||||
IO.copy_stream(source_path, tempfile)
|
||||
else
|
||||
@attachment.file.blob.open { |file| IO.copy_stream(file, tempfile) }
|
||||
end
|
||||
tempfile.rewind
|
||||
tempfile
|
||||
end
|
||||
|
||||
def replace_attachment_file(output_file, format_config)
|
||||
filename = "#{File.basename(@attachment.file.filename.to_s, '.*')}.#{format_config[:extension]}"
|
||||
File.open(output_file.path, 'rb') do |file|
|
||||
@attachment.file.attach(
|
||||
io: file,
|
||||
filename: filename,
|
||||
content_type: format_config[:content_type]
|
||||
)
|
||||
end
|
||||
@attachment.file_type = :audio
|
||||
end
|
||||
end
|
||||
@ -156,14 +156,12 @@ class Whatsapp::Providers::WhatsappCloudService < Whatsapp::Providers::BaseServi
|
||||
|
||||
def send_attachment_message(phone_number, message)
|
||||
attachment = message.attachments.first
|
||||
normalize_opus_content_type(attachment)
|
||||
type = %w[image audio video].include?(attachment.file_type) ? attachment.file_type : 'document'
|
||||
type_content = {
|
||||
'link': attachment.download_url
|
||||
}
|
||||
type_content = { 'link' => attachment.download_url }
|
||||
type_content['caption'] = message.outgoing_content unless %w[audio sticker].include?(type)
|
||||
type_content['filename'] = attachment.file.filename if type == 'document'
|
||||
# FIXME: This requires transcoding to opus/ogg.
|
||||
# type_content['voice'] = true if type == 'audio' && attachment.meta&.dig('is_recorded_audio')
|
||||
type_content['voice'] = true if voice_message?(type, attachment)
|
||||
response = HTTParty.post(
|
||||
"#{phone_id_path('v24.0')}/messages",
|
||||
headers: api_headers,
|
||||
@ -179,6 +177,25 @@ class Whatsapp::Providers::WhatsappCloudService < Whatsapp::Providers::BaseServi
|
||||
process_response(response, message)
|
||||
end
|
||||
|
||||
def voice_message?(type, attachment)
|
||||
type == 'audio' && attachment.meta&.dig('is_recorded_audio') && attachment.file.content_type == 'audio/ogg'
|
||||
end
|
||||
|
||||
# Marcel gem may re-detect OGG/Opus files as audio/opus after ActiveStorage
|
||||
# blob attachment, but WhatsApp Cloud API requires audio/ogg content type
|
||||
# for voice messages. Normalize so the download URL serves the correct
|
||||
# Content-Type header. No-op when the frontend already uploads as audio/ogg.
|
||||
def normalize_opus_content_type(attachment)
|
||||
return unless attachment.file.attached?
|
||||
|
||||
blob = attachment.file.blob
|
||||
return unless blob.content_type == 'audio/opus'
|
||||
|
||||
return if blob.update(content_type: 'audio/ogg')
|
||||
|
||||
Rails.logger.error("Failed to normalize blob #{blob.id} content_type from audio/opus to audio/ogg")
|
||||
end
|
||||
|
||||
def error_message(response)
|
||||
# https://developers.facebook.com/docs/whatsapp/cloud-api/support/error-codes/#sample-response
|
||||
response.parsed_response&.dig('error', 'message')
|
||||
|
||||
@ -128,6 +128,7 @@ RUN apk update && apk add --no-cache \
|
||||
imagemagick \
|
||||
git \
|
||||
vips \
|
||||
ffmpeg \
|
||||
&& gem install bundler -v "$BUNDLER_VERSION"
|
||||
|
||||
COPY --from=node /usr/local/bin/node /usr/local/bin/
|
||||
|
||||
13
lib/custom_exceptions/audio.rb
Normal file
13
lib/custom_exceptions/audio.rb
Normal file
@ -0,0 +1,13 @@
|
||||
module CustomExceptions::Audio
|
||||
class UnsupportedFormatError < CustomExceptions::Base
|
||||
def message
|
||||
@data
|
||||
end
|
||||
end
|
||||
|
||||
class TranscodingError < CustomExceptions::Base
|
||||
def message
|
||||
@data
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -186,6 +186,50 @@ describe Messages::MessageBuilder do
|
||||
})
|
||||
end
|
||||
|
||||
context 'when transcode_audio is set' do
|
||||
let(:params) do
|
||||
ActionController::Parameters.new({
|
||||
content: 'test',
|
||||
transcode_audio: 'opus',
|
||||
attachments: [Rack::Test::UploadedFile.new('spec/assets/sample.mp3', 'audio/mpeg')]
|
||||
})
|
||||
end
|
||||
|
||||
it 'transcodes audio attachment and sets is_recorded_audio metadata' do
|
||||
service_instance = instance_double(Audio::TranscodeService)
|
||||
allow(Audio::TranscodeService).to receive(:new).and_return(service_instance)
|
||||
allow(service_instance).to receive(:perform)
|
||||
|
||||
message = message_builder
|
||||
|
||||
expect(Audio::TranscodeService).to have_received(:new)
|
||||
expect(service_instance).to have_received(:perform)
|
||||
expect(message.attachments.first.meta).to include('is_recorded_audio' => true)
|
||||
end
|
||||
|
||||
it 'does not transcode non-audio attachments' do
|
||||
allow(Audio::TranscodeService).to receive(:new)
|
||||
params[:attachments] = [Rack::Test::UploadedFile.new('spec/assets/avatar.png', 'image/png')]
|
||||
|
||||
message = message_builder
|
||||
|
||||
expect(Audio::TranscodeService).not_to have_received(:new)
|
||||
expect(message.attachments.first.file_type).to eq 'image'
|
||||
end
|
||||
end
|
||||
|
||||
context 'when transcode_audio is not set' do
|
||||
it 'does not invoke transcoding service' do
|
||||
allow(Audio::TranscodeService).to receive(:new)
|
||||
params[:attachments] = [Rack::Test::UploadedFile.new('spec/assets/sample.mp3', 'audio/mpeg')]
|
||||
|
||||
message = message_builder
|
||||
|
||||
expect(Audio::TranscodeService).not_to have_received(:new)
|
||||
expect(message.attachments.first.file_type).to eq 'audio'
|
||||
end
|
||||
end
|
||||
|
||||
context 'when DIRECT_UPLOAD_ENABLED' do
|
||||
let(:params) do
|
||||
ActionController::Parameters.new({
|
||||
|
||||
106
spec/services/audio/transcode_service_spec.rb
Normal file
106
spec/services/audio/transcode_service_spec.rb
Normal file
@ -0,0 +1,106 @@
|
||||
require 'rails_helper'
|
||||
|
||||
RSpec.describe Audio::TranscodeService do
|
||||
let(:message) { create(:message) }
|
||||
let(:attachment) do
|
||||
attachment = message.attachments.new(account_id: message.account_id, file_type: :audio)
|
||||
attachment.file.attach(io: Rails.root.join('spec/assets/sample.mp3').open, filename: 'sample.mp3', content_type: 'audio/mpeg')
|
||||
attachment.save!
|
||||
attachment
|
||||
end
|
||||
|
||||
describe '#perform' do
|
||||
context 'with unsupported format' do
|
||||
it 'raises UnsupportedFormatError' do
|
||||
error = nil
|
||||
begin
|
||||
described_class.new(attachment, 'aac').perform
|
||||
rescue StandardError => e
|
||||
error = e
|
||||
end
|
||||
|
||||
expect(error).not_to be_nil
|
||||
expect(error.class.name).to eq('CustomExceptions::Audio::UnsupportedFormatError')
|
||||
expect(error.message).to match(/Unsupported transcode format: aac/)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with opus format' do
|
||||
it 'skips transcoding when file is already in target format' do
|
||||
ogg_attachment = message.attachments.new(account_id: message.account_id, file_type: :audio)
|
||||
ogg_attachment.file.attach(io: StringIO.new('ogg_data'), filename: 'recording.ogg', content_type: 'audio/ogg')
|
||||
ogg_attachment.save!
|
||||
|
||||
allow(FFMPEG::Movie).to receive(:new)
|
||||
|
||||
described_class.new(ogg_attachment, 'opus').perform
|
||||
|
||||
expect(FFMPEG::Movie).not_to have_received(:new)
|
||||
expect(ogg_attachment.file.content_type).to eq('audio/ogg')
|
||||
end
|
||||
|
||||
it 'transcodes audio to ogg/opus format' do
|
||||
mock_movie = instance_double(FFMPEG::Movie, valid?: true)
|
||||
allow(FFMPEG::Movie).to receive(:new).and_return(mock_movie)
|
||||
allow(mock_movie).to receive(:transcode) do |output_path, _options|
|
||||
File.write(output_path, 'fake_opus_data')
|
||||
end
|
||||
|
||||
described_class.new(attachment, 'opus').perform
|
||||
|
||||
expect(attachment.file.filename.to_s).to eq('sample.ogg')
|
||||
expect(attachment.file.content_type).to eq('audio/ogg')
|
||||
expect(attachment.file_type).to eq('audio')
|
||||
end
|
||||
|
||||
it 'transcodes using source_file when provided' do
|
||||
uploaded_file = Rack::Test::UploadedFile.new(Rails.root.join('spec/assets/sample.mp3').to_s, 'audio/mpeg')
|
||||
|
||||
mock_movie = instance_double(FFMPEG::Movie, valid?: true)
|
||||
allow(FFMPEG::Movie).to receive(:new).and_return(mock_movie)
|
||||
allow(mock_movie).to receive(:transcode) do |output_path, _options|
|
||||
File.write(output_path, 'fake_opus_data')
|
||||
end
|
||||
|
||||
described_class.new(attachment, 'opus', source_file: uploaded_file).perform
|
||||
|
||||
expect(attachment.file.filename.to_s).to eq('sample.ogg')
|
||||
expect(attachment.file.content_type).to eq('audio/ogg')
|
||||
expect(attachment.file_type).to eq('audio')
|
||||
end
|
||||
|
||||
it 'raises TranscodingError when the audio file is invalid' do
|
||||
mock_movie = instance_double(FFMPEG::Movie, valid?: false)
|
||||
allow(FFMPEG::Movie).to receive(:new).and_return(mock_movie)
|
||||
|
||||
error = nil
|
||||
begin
|
||||
described_class.new(attachment, 'opus').perform
|
||||
rescue StandardError => e
|
||||
error = e
|
||||
end
|
||||
|
||||
expect(error).not_to be_nil
|
||||
expect(error.class.name).to eq('CustomExceptions::Audio::TranscodingError')
|
||||
expect(error.message).to match(/Invalid or unreadable audio file/)
|
||||
end
|
||||
|
||||
it 'raises TranscodingError when FFmpeg fails' do
|
||||
mock_movie = instance_double(FFMPEG::Movie, valid?: true)
|
||||
allow(FFMPEG::Movie).to receive(:new).and_return(mock_movie)
|
||||
allow(mock_movie).to receive(:transcode).and_raise(FFMPEG::Error, 'encoding failed')
|
||||
|
||||
error = nil
|
||||
begin
|
||||
described_class.new(attachment, 'opus').perform
|
||||
rescue StandardError => e
|
||||
error = e
|
||||
end
|
||||
|
||||
expect(error).not_to be_nil
|
||||
expect(error.class.name).to eq('CustomExceptions::Audio::TranscodingError')
|
||||
expect(error.message).to match(/FFmpeg transcoding failed/)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -109,23 +109,43 @@ describe Whatsapp::Providers::WhatsappCloudService do
|
||||
expect(service.send_message('+123456789', message)).to eq 'message_id'
|
||||
end
|
||||
|
||||
# FIXME: This requires transcoding to opus/ogg.
|
||||
# it 'calls message endpoints with voice flag for recorded audio attachment' do
|
||||
# attachment = message.attachments.new(account_id: message.account_id, file_type: :audio, meta: { 'is_recorded_audio' => true })
|
||||
# attachment.file.attach(io: Rails.root.join('spec/assets/sample.mp3').open, filename: 'sample.mp3', content_type: 'audio/mpeg')
|
||||
it 'does not send voice flag for recorded audio in non-ogg format' do
|
||||
attachment = message.attachments.new(account_id: message.account_id, file_type: :audio, meta: { 'is_recorded_audio' => true })
|
||||
attachment.file.attach(io: Rails.root.join('spec/assets/sample.mp3').open, filename: 'sample.mp3', content_type: 'audio/mpeg')
|
||||
|
||||
# stub_request(:post, 'https://graph.facebook.com/v24.0/123456789/messages')
|
||||
# .with(
|
||||
# body: hash_including({
|
||||
# messaging_product: 'whatsapp',
|
||||
# to: '+123456789',
|
||||
# type: 'audio',
|
||||
# audio: WebMock::API.hash_including({ link: anything, voice: true })
|
||||
# })
|
||||
# )
|
||||
# .to_return(status: 200, body: whatsapp_response.to_json, headers: response_headers)
|
||||
# expect(service.send_message('+123456789', message)).to eq 'message_id'
|
||||
# end
|
||||
stub_request(:post, 'https://graph.facebook.com/v24.0/123456789/messages')
|
||||
.with(
|
||||
body: hash_including({
|
||||
messaging_product: 'whatsapp',
|
||||
to: '+123456789',
|
||||
type: 'audio',
|
||||
audio: WebMock::API.hash_including({ link: anything })
|
||||
})
|
||||
)
|
||||
.to_return(status: 200, body: whatsapp_response.to_json, headers: response_headers)
|
||||
|
||||
# Ensure voice flag is NOT present for non-ogg audio
|
||||
expect(service.send_message('+123456789', message)).to eq 'message_id'
|
||||
expect(WebMock).not_to(have_requested(:post, 'https://graph.facebook.com/v24.0/123456789/messages')
|
||||
.with { |req| JSON.parse(req.body).dig('audio', 'voice') })
|
||||
end
|
||||
|
||||
it 'sends voice flag for recorded audio in ogg format' do
|
||||
attachment = message.attachments.new(account_id: message.account_id, file_type: :audio, meta: { 'is_recorded_audio' => true })
|
||||
attachment.file.attach(io: Rails.root.join('spec/assets/sample.ogg').open, filename: 'sample.ogg', content_type: 'audio/ogg')
|
||||
|
||||
stub_request(:post, 'https://graph.facebook.com/v24.0/123456789/messages')
|
||||
.with(
|
||||
body: hash_including({
|
||||
messaging_product: 'whatsapp',
|
||||
to: '+123456789',
|
||||
type: 'audio',
|
||||
audio: WebMock::API.hash_including({ link: anything, voice: true })
|
||||
})
|
||||
)
|
||||
.to_return(status: 200, body: whatsapp_response.to_json, headers: response_headers)
|
||||
expect(service.send_message('+123456789', message)).to eq 'message_id'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user