Early Beta — The Web SDK is in early beta. APIs may change between releases.
Overview
Voice Activity Detection (VAD) determines when speech is present in an audio stream. It’s essential for building voice interfaces that automatically detect when users start and stop speaking. The Web SDK uses Silero VAD compiled to WebAssembly via sherpa-onnx.
Package Imports
VAD classes come from @runanywhere/web-onnx, while model management is in @runanywhere/web:
import { ModelManager, ModelCategory, RunAnywhere, LLMFramework } from '@runanywhere/web'
import { VAD, SpeechActivity, AudioCapture } from '@runanywhere/web-onnx'
Basic Usage
import { ModelManager, ModelCategory } from '@runanywhere/web'
import { VAD, SpeechActivity, AudioCapture } from '@runanywhere/web-onnx'
// Ensure VAD model is loaded
await ModelManager.downloadModel('silero-vad-v5')
await ModelManager.loadModel('silero-vad-v5', { coexist: true })
// Listen for speech activity
const unsubscribe = VAD.onSpeechActivity((activity) => {
if (activity === SpeechActivity.Ended) {
const segment = VAD.popSpeechSegment()
if (segment) {
console.log(`Speech segment: ${segment.samples.length} samples`)
// Process the speech segment (e.g., send to STT)
}
}
})
// Create mic and feed audio to VAD
const mic = new AudioCapture({ sampleRate: 16000 })
await mic.start(
(chunk) => {
VAD.processSamples(chunk)
},
(level) => {
console.log('Audio level:', level)
}
)
// Stop
mic.stop()
unsubscribe()
Model Registration
Register the Silero VAD model in your model catalog:
import { RunAnywhere, ModelCategory, LLMFramework } from '@runanywhere/web'
RunAnywhere.registerModels([
{
id: 'silero-vad-v5',
name: 'Silero VAD v5',
url: 'https://huggingface.co/runanywhere/silero-vad-v5/resolve/main/silero_vad.onnx',
files: ['silero_vad.onnx'],
framework: LLMFramework.ONNX,
modality: ModelCategory.Audio,
memoryRequirement: 5_000_000,
},
])
API Reference
VAD.processSamples
Process audio samples for voice activity.
VAD.processSamples(samples: Float32Array): boolean
Returns true if speech is detected in the current frame.
VAD.onSpeechActivity
Subscribe to speech activity events.
VAD.onSpeechActivity(
callback: (activity: SpeechActivity) => void
): () => void // returns unsubscribe function
SpeechActivity
enum SpeechActivity {
Started, // Speech began
Ended, // Speech ended
Ongoing, // Speech continues
}
VAD.popSpeechSegment
Get the most recent completed speech segment.
VAD.popSpeechSegment(): SpeechSegment | null
interface SpeechSegment {
/** Start time in seconds */
startTime: number
/** PCM audio samples of the speech */
samples: Float32Array
}
Other Methods
// Reset VAD state between sessions
VAD.reset()
// Flush any remaining audio
VAD.flush()
// Release all resources
VAD.cleanup()
Examples
Live Microphone VAD
import { ModelManager, ModelCategory } from '@runanywhere/web'
import { VAD, SpeechActivity, AudioCapture } from '@runanywhere/web-onnx'
// Ensure VAD model is loaded
await ModelManager.downloadModel('silero-vad-v5')
await ModelManager.loadModel('silero-vad-v5', { coexist: true })
const mic = new AudioCapture({ sampleRate: 16000 })
VAD.reset()
const unsubscribe = VAD.onSpeechActivity((activity) => {
if (activity === SpeechActivity.Ended) {
const segment = VAD.popSpeechSegment()
if (segment && segment.samples.length > 1600) {
// Minimum ~100ms of speech to avoid noise
processAudio(segment.samples)
}
}
})
await mic.start(
(chunk) => {
VAD.processSamples(chunk)
},
(level) => {
document.getElementById('level')!.style.width = `${level * 100}%`
}
)
// Stop
mic.stop()
unsubscribe()
VAD + STT (Record and Transcribe)
import { ModelManager, ModelCategory } from '@runanywhere/web'
import { VAD, SpeechActivity, AudioCapture } from '@runanywhere/web-onnx'
// Load both VAD and STT models with coexist
await ModelManager.loadModel('silero-vad-v5', { coexist: true })
await ModelManager.loadModel('sherpa-onnx-whisper-tiny.en', { coexist: true })
const mic = new AudioCapture({ sampleRate: 16000 })
VAD.reset()
VAD.onSpeechActivity(async (activity) => {
if (activity === SpeechActivity.Ended) {
const segment = VAD.popSpeechSegment()
if (segment && segment.samples.length > 1600) {
// The VoicePipeline handles STT automatically — see voice-agent docs
console.log('Got speech segment:', segment.samples.length, 'samples')
}
}
})
await mic.start(
(chunk) => {
VAD.processSamples(chunk)
},
() => {}
)
React Component
import { useState, useCallback, useRef, useEffect } from 'react'
import { ModelManager, ModelCategory } from '@runanywhere/web'
import { VAD, SpeechActivity, AudioCapture } from '@runanywhere/web-onnx'
export function VoiceDetector() {
const [isSpeaking, setIsSpeaking] = useState(false)
const [isListening, setIsListening] = useState(false)
const [audioLevel, setAudioLevel] = useState(0)
const micRef = useRef<AudioCapture | null>(null)
const unsubRef = useRef<(() => void) | null>(null)
useEffect(() => {
return () => {
micRef.current?.stop()
unsubRef.current?.()
}
}, [])
const startListening = useCallback(async () => {
const mic = new AudioCapture({ sampleRate: 16000 })
micRef.current = mic
VAD.reset()
unsubRef.current = VAD.onSpeechActivity((activity) => {
setIsSpeaking(activity === SpeechActivity.Started || activity === SpeechActivity.Ongoing)
if (activity === SpeechActivity.Ended) {
const segment = VAD.popSpeechSegment()
if (segment) {
console.log('Speech segment captured:', segment.samples.length, 'samples')
}
}
})
await mic.start(
(chunk) => {
VAD.processSamples(chunk)
},
(level) => {
setAudioLevel(level)
}
)
setIsListening(true)
}, [])
const stopListening = useCallback(() => {
micRef.current?.stop()
unsubRef.current?.()
VAD.reset()
setIsListening(false)
setIsSpeaking(false)
setAudioLevel(0)
}, [])
return (
<div style={{ textAlign: 'center', padding: 32 }}>
<div
style={{
width: 80,
height: 80,
borderRadius: '50%',
backgroundColor: isSpeaking ? '#4caf50' : '#ddd',
margin: '0 auto 16px',
transition: 'background-color 0.2s',
transform: `scale(${1 + audioLevel * 0.3})`,
}}
/>
<p>{isSpeaking ? 'Speaking...' : isListening ? 'Listening...' : 'Ready'}</p>
<button onClick={isListening ? stopListening : startListening}>
{isListening ? 'Stop' : 'Start Listening'}
</button>
</div>
)
}
Error Handling
import { SDKError, SDKErrorCode } from '@runanywhere/web'
try {
await ModelManager.loadModel('silero-vad-v5', { coexist: true })
} catch (err) {
if (err instanceof SDKError) {
console.error('VAD error:', err.message)
}
}