// Voice Clone — WebAudio PCM capture + WAV encode + upload. // // Why not MediaRecorder? On iOS WebKit (Safari, DuckDuckGo, any iOS browser) // MediaRecorder is unreliable: it often emits zero chunks even when stop() + // requestData() are called in the documented order. WebAudio's ScriptProcessor // has worked on iOS since iOS 6 and gives us raw Float32 samples we can // encode to WAV ourselves. Payload is ~2× larger than m4a for a 60s clip // (~2 MB vs ~1 MB), which is fine for a one-shot clone upload. const { useState: useS_V, useEffect: useE_V, useRef: useR_V } = React; const SCRIPT = `I am not who I was. I am building who I will be. Every day I show up. Every day I do the work. The weight comes off. The mind clears. I am steady.`; // Pack mono Float32 PCM into a 16-bit little-endian WAV blob. // iOS WebKit rejects single-part Blobs built from a DataView-written ArrayBuffer // (returns size=0 even when the buffer is full). Workaround: build the header // and the PCM data as SEPARATE typed arrays, each with its own fresh buffer, // and pass both to Blob as a multi-part body. Safari handles that path // correctly — probably because it allocates its own internal storage for // multi-part Blobs rather than aliasing the caller's memory. function encodeWAV(samples, sampleRate) { const dataBytes = samples.length * 2; // Header — 44 bytes in its own Uint8Array. const header = new Uint8Array(44); const hv = new DataView(header.buffer); const put4 = (off, s) => { for (let i = 0; i < 4; i++) header[off + i] = s.charCodeAt(i); }; put4(0, "RIFF"); hv.setUint32(4, 36 + dataBytes, true); put4(8, "WAVE"); put4(12, "fmt "); hv.setUint32(16, 16, true); hv.setUint16(20, 1, true); // PCM hv.setUint16(22, 1, true); // mono hv.setUint32(24, sampleRate, true); hv.setUint32(28, sampleRate * 2, true); // byte rate hv.setUint16(32, 2, true); // block align hv.setUint16(34, 16, true); // bits/sample put4(36, "data"); hv.setUint32(40, dataBytes, true); // PCM — Int16Array has its own fresh ArrayBuffer, no DataView aliasing. const pcm = new Int16Array(samples.length); for (let i = 0; i < samples.length; i++) { const s = Math.max(-1, Math.min(1, samples[i])); pcm[i] = s < 0 ? s * 0x8000 : s * 0x7fff; } return new Blob([header, pcm], { type: "audio/wav" }); } function VoiceCloneScreen({ onDone, onSkip }) { const [stage, setStage] = useS_V("idle"); // idle | armed | recorded | uploading | error const [elapsed, setElapsed] = useS_V(0); const [levels, setLevels] = useS_V(() => new Array(28).fill(3)); const [errorMsg, setErrorMsg] = useS_V(""); // hasPreview gates the Preview button. True when we have a playable local // blob URL — which is "always" for file uploads (native File) and "only if // Blob construction worked" for PCM recordings. const [hasPreview, setHasPreview] = useS_V(false); const targetMs = 60000; const rafRef = useR_V(null); const startRef = useR_V(null); const mediaRef = useR_V(null); const chunksRef = useR_V([]); const blobRef = useR_V(null); // populated for file-upload path only const pcmRef = useR_V(null); // populated for recorded-PCM path only const pcmSampleRateRef = useR_V(0); const previewUrlRef = useR_V(null); const audioCtxRef = useR_V(null); const analyserRef = useR_V(null); const processorRef = useR_V(null); const silentGainRef = useR_V(null); const sourceRef = useR_V(null); const sampleRateRef = useR_V(0); const processCountRef = useR_V(0); const ctxStateRef = useR_V(""); const trackCountRef = useR_V(0); const fileRef = useR_V(null); const uploadNameRef = useR_V("voice.wav"); useE_V(() => { if (stage !== "armed") return; startRef.current = performance.now(); const loop = () => { const e = performance.now() - startRef.current; setElapsed(e); const analyser = analyserRef.current; if (analyser) { const buf = new Uint8Array(analyser.frequencyBinCount); analyser.getByteFrequencyData(buf); let sum = 0; for (let i = 0; i < buf.length; i++) sum += buf[i]; const avg = sum / buf.length; setLevels(prev => { const next = prev.slice(1); next.push(Math.max(3, 3 + (avg / 255) * 43)); return next; }); } else { setLevels(prev => { const next = prev.slice(1); next.push(3 + Math.random() * 10); return next; }); } if (e >= targetMs) { stopRecording(); return; } rafRef.current = requestAnimationFrame(loop); }; rafRef.current = requestAnimationFrame(loop); return () => cancelAnimationFrame(rafRef.current); }, [stage]); useE_V(() => () => cleanup(), []); const teardownAudio = () => { const p = processorRef.current; if (p) { try { p.disconnect(); } catch {} p.onaudioprocess = null; processorRef.current = null; } if (silentGainRef.current) { try { silentGainRef.current.disconnect(); } catch {} silentGainRef.current = null; } if (sourceRef.current) { try { sourceRef.current.disconnect(); } catch {} sourceRef.current = null; } if (mediaRef.current) { mediaRef.current.getTracks().forEach(t => t.stop()); mediaRef.current = null; } if (audioCtxRef.current) { try { audioCtxRef.current.close(); } catch {} audioCtxRef.current = null; } analyserRef.current = null; }; const cleanup = () => { cancelAnimationFrame(rafRef.current); teardownAudio(); if (previewUrlRef.current) { URL.revokeObjectURL(previewUrlRef.current); previewUrlRef.current = null; } }; const startRecording = async () => { setErrorMsg(""); try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); mediaRef.current = stream; trackCountRef.current = stream.getAudioTracks().length; const AC = window.AudioContext || window.webkitAudioContext; const ctx = new AC(); // iOS suspends AudioContexts created off user gesture until resume() is called. // This method runs inside an onClick handler, so resume is safe. if (ctx.state === "suspended" && ctx.resume) { try { await ctx.resume(); } catch {} } ctxStateRef.current = ctx.state; audioCtxRef.current = ctx; sampleRateRef.current = ctx.sampleRate; processCountRef.current = 0; const source = ctx.createMediaStreamSource(stream); sourceRef.current = source; const analyser = ctx.createAnalyser(); analyser.fftSize = 64; source.connect(analyser); analyserRef.current = analyser; // ScriptProcessor is deprecated but works on every browser including iOS // WebKit. AudioWorklet is the modern replacement but requires async module // loading — overkill for a one-shot clone. Buffer 4096 samples = ~93ms @ 44.1kHz. const processor = ctx.createScriptProcessor(4096, 1, 1); chunksRef.current = []; processor.onaudioprocess = (e) => { processCountRef.current++; const input = e.inputBuffer.getChannelData(0); // Must copy — the inputBuffer is reused by the audio graph on the next tick. chunksRef.current.push(new Float32Array(input)); }; source.connect(processor); // ScriptProcessor needs to be connected to destination on some browsers // for onaudioprocess to fire. Route through a silent gain to avoid feedback. const silent = ctx.createGain(); silent.gain.value = 0; processor.connect(silent); silent.connect(ctx.destination); processorRef.current = processor; silentGainRef.current = silent; setStage("armed"); } catch (e) { console.warn("mic start failed:", e); setErrorMsg(e.message || "Microphone blocked"); setStage("error"); } }; const stopRecording = () => { cancelAnimationFrame(rafRef.current); const chunks = chunksRef.current; const totalLen = chunks.reduce((acc, c) => acc + c.length, 0); const sampleRate = sampleRateRef.current || 44100; // Stop PCM capture before we touch the arrays. const p = processorRef.current; if (p) { try { p.disconnect(); } catch {} p.onaudioprocess = null; processorRef.current = null; } // Max sample magnitude tells us whether the mic produced actual audio or // just silent buffers. iOS WebKit sometimes fires onaudioprocess with all- // zeros even when the track is "live" — that presents as a valid-looking // capture but an unusable one. We check both totalLen and peak amplitude. let peak = 0; for (const c of chunks) { for (let i = 0; i < c.length; i++) { const v = Math.abs(c[i]); if (v > peak) peak = v; } } const diagBase = `v14 ev=${processCountRef.current} ctx=${ctxStateRef.current || "?"} tr=${trackCountRef.current} sr=${sampleRate} len=${totalLen} peak=${peak.toFixed(3)}`; if (!totalLen || peak < 0.002) { teardownAudio(); setErrorMsg(`Mic returned silence [${diagBase}] — use Upload below`); setStage("error"); return; } // Merge Float32 chunks, then convert to mono 16-bit LE PCM. The Int16Array // is what we upload — its `.buffer` ships directly via fetch(body: ArrayBuffer), // which bypasses Blob construction entirely. Server wraps in WAV on receipt. const samples = new Float32Array(totalLen); let offset = 0; for (const c of chunks) { samples.set(c, offset); offset += c.length; } const pcm = new Int16Array(totalLen); for (let i = 0; i < totalLen; i++) { const s = Math.max(-1, Math.min(1, samples[i])); pcm[i] = s < 0 ? s * 0x8000 : s * 0x7fff; } teardownAudio(); pcmRef.current = pcm; pcmSampleRateRef.current = sampleRate; blobRef.current = null; uploadNameRef.current = "voice.wav"; // Best-effort local preview. On iOS WebKit DDG this returns a broken blob // (size === undefined), so we fall back to no-preview — upload still works // because it takes the raw PCM path, not the blob path. if (previewUrlRef.current) { URL.revokeObjectURL(previewUrlRef.current); previewUrlRef.current = null; } try { const wav = encodeWAV(samples, sampleRate); if (wav && wav.size > 0) { previewUrlRef.current = URL.createObjectURL(wav); setHasPreview(true); } else { setHasPreview(false); } } catch { setHasPreview(false); } setStage("recorded"); }; // Native-picker escape hatch. iOS WebKit in-browser recording (MediaRecorder, // WebAudio PCM) has a history of silently dropping audio — when that happens, // the user can record in Voice Memos or their camera app and upload the file // here. `capture="user"` offers the native recorder directly on supported // browsers; if it's not supported, the file picker still works. const onPickFile = (e) => { const f = e.target.files && e.target.files[0]; e.target.value = ""; // allow re-picking the same file later if (!f) return; setErrorMsg(""); // File path: the File object is a native Blob (not client-constructed), // so preview works and we route through the old multipart clone endpoint. pcmRef.current = null; pcmSampleRateRef.current = 0; blobRef.current = f; uploadNameRef.current = f.name || "voice.m4a"; if (previewUrlRef.current) URL.revokeObjectURL(previewUrlRef.current); previewUrlRef.current = URL.createObjectURL(f); setHasPreview(true); // We don't know true duration for an arbitrary upload — show the file size // worth of "captured" by leaving elapsed untouched; the UX copy handles it. setStage("recorded"); }; const resetForReRecord = () => { blobRef.current = null; pcmRef.current = null; pcmSampleRateRef.current = 0; if (previewUrlRef.current) { URL.revokeObjectURL(previewUrlRef.current); previewUrlRef.current = null; } setHasPreview(false); setElapsed(0); setLevels(new Array(28).fill(3)); setStage("idle"); }; const preview = () => { if (!previewUrlRef.current) return; const a = new Audio(previewUrlRef.current); a.play().catch(err => console.warn("preview play failed", err)); }; const upload = async () => { // Two upload paths: PCM (in-app recording, iOS-safe raw bytes) or File // (native upload via multipart). PCM wins if both happen to be set. if (!pcmRef.current && !blobRef.current) return; setStage("uploading"); try { if (pcmRef.current) { await api.cloneVoicePCM(pcmRef.current.buffer, pcmSampleRateRef.current || 44100); } else { await api.cloneVoice(blobRef.current, uploadNameRef.current || "voice.wav"); } cleanup(); onDone(); } catch (e) { console.warn("upload failed:", e); setErrorMsg(e.message || "Upload failed"); setStage("recorded"); } }; const toggleRecord = () => { if (stage === "idle" || stage === "error") startRecording(); else if (stage === "armed") stopRecording(); else if (stage === "recorded") resetForReRecord(); }; const fmt = (ms) => { const s = Math.floor(ms / 1000); return `0:${String(s).padStart(2,"0")}`; }; return (
RECORD YOUR
FUTURE SELF.
Your own voice, saying these words back to you, is 2× more powerful than a stranger’s. 30–60 seconds is enough.
Read out loud, naturally {SCRIPT}
{levels.map((h, i) => (
))}
{stage === "idle" && "Tap to record"} {stage === "armed" && {fmt(elapsed)} / 1:00} {stage === "recorded" && {elapsed > 0 ? `${fmt(elapsed)} captured` : "Recording selected"}} {stage === "uploading" && "Uploading…"} {stage === "error" && {errorMsg || "Mic unavailable"}}
{/* No `capture` attribute — on iOS, `capture="user"` means the FRONT-FACING CAMERA for video, not the mic. That's why this button used to open camera. Without `capture`, iOS presents the Files picker, which lets the user navigate to Voice Memos (On My iPhone → Voice Memos) and pick a saved recording. */}
{stage === "recorded" && (
{hasPreview && }
{errorMsg &&
{errorMsg}
}
)}
{(stage === "idle" || stage === "armed" || stage === "error") && ( )}
voice build v14-serverwav
); } Object.assign(window, { VoiceCloneScreen });