Documentation Index
Fetch the complete documentation index at: https://docs.runanywhere.ai/llms.txt
Use this file to discover all available pages before exploring further.
The Voice Agent orchestrates the complete voice interaction pipeline: VAD → STT → LLM → TTS. Build hands-free voice assistants with a single API.
Overview
The pipeline automatically:
- Listens for speech using VAD
- Transcribes what the user says
- Generates an AI response
- Speaks the response back
Prerequisites
Load all required models before starting a voice session:
// Check if ready
if (!RunAnywhere.isVoiceAgentReady) {
await RunAnywhere.loadSTTModel('sherpa-onnx-whisper-tiny.en');
await RunAnywhere.loadModel('smollm2-360m-q8_0');
await RunAnywhere.loadTTSVoice('vits-piper-en_US-lessac-medium');
}
Basic Usage
// Start voice session
final session = await RunAnywhere.startVoiceSession();
// Listen to events
session.events.listen((event) {
switch (event) {
case VoiceSessionListening(:final audioLevel):
print('Listening... Level: $audioLevel');
case VoiceSessionSpeechStarted():
print('Speech detected!');
case VoiceSessionTranscribed(:final text):
print('User: $text');
case VoiceSessionResponded(:final text):
print('AI: $text');
case VoiceSessionTurnCompleted(:final transcript, :final response):
print('Turn complete - User: $transcript, AI: $response');
default:
break;
}
});
// Stop when done
session.stop();
VoiceSessionConfig
Configure voice session behavior:
final session = await RunAnywhere.startVoiceSession(
config: VoiceSessionConfig(
silenceDuration: 1.5, // Seconds of silence before processing
speechThreshold: 0.03, // Audio level threshold for speech
autoPlayTTS: true, // Automatically play TTS response
continuousMode: true, // Resume listening after TTS completes
),
);
| Parameter | Type | Default | Description |
|---|
silenceDuration | double | 1.5 | Seconds of silence to trigger processing |
speechThreshold | double | 0.03 | Audio level threshold for speech detection |
autoPlayTTS | bool | true | Auto-play synthesized audio |
continuousMode | bool | true | Keep listening after response |
VoiceSessionHandle
Control the voice session:
final session = await RunAnywhere.startVoiceSession();
// Check state
print('Running: ${session.isRunning}');
print('Processing: ${session.isProcessing}');
// Start listening (called automatically)
await session.start();
// Force process current audio (push-to-talk mode)
await session.sendNow();
// Stop session
session.stop();
// Clean up
await session.dispose();
Voice Session Events
| Event | Description | Properties |
|---|
VoiceSessionStarted | Session started | - |
VoiceSessionListening | Actively listening | audioLevel |
VoiceSessionSpeechStarted | Speech detected | - |
VoiceSessionProcessing | Processing audio | - |
VoiceSessionTranscribed | Got transcription | text |
VoiceSessionResponded | Got LLM response | text |
VoiceSessionSpeaking | Playing TTS | - |
VoiceSessionTurnCompleted | Turn finished | transcript, response |
VoiceSessionStopped | Session stopped | - |
VoiceSessionError | Error occurred | message |
Complete Example
class VoiceAssistantScreen extends StatefulWidget {
@override
_VoiceAssistantScreenState createState() => _VoiceAssistantScreenState();
}
class _VoiceAssistantScreenState extends State<VoiceAssistantScreen> {
VoiceSessionHandle? _session;
List<ChatMessage> _messages = [];
double _audioLevel = 0.0;
String _status = 'Tap to start';
bool _isActive = false;
Future<void> _initializeModels() async {
setState(() => _status = 'Loading models...');
// Download and load all required models
if (!RunAnywhere.isSTTModelLoaded) {
await _downloadAndLoad('sherpa-onnx-whisper-tiny.en', 'STT');
await RunAnywhere.loadSTTModel('sherpa-onnx-whisper-tiny.en');
}
if (!RunAnywhere.isModelLoaded) {
await _downloadAndLoad('smollm2-360m-q8_0', 'LLM');
await RunAnywhere.loadModel('smollm2-360m-q8_0');
}
if (!RunAnywhere.isTTSVoiceLoaded) {
await _downloadAndLoad('vits-piper-en_US-lessac-medium', 'TTS');
await RunAnywhere.loadTTSVoice('vits-piper-en_US-lessac-medium');
}
setState(() => _status = 'Ready');
}
Future<void> _downloadAndLoad(String modelId, String label) async {
await for (final p in RunAnywhere.downloadModel(modelId)) {
setState(() => _status = 'Downloading $label: ${(p.percentage * 100).toStringAsFixed(0)}%');
if (p.state.isCompleted) break;
}
}
Future<void> _toggleSession() async {
if (_session != null) {
_session!.stop();
setState(() {
_session = null;
_isActive = false;
_status = 'Stopped';
});
return;
}
// Ensure models are loaded
if (!RunAnywhere.isVoiceAgentReady) {
await _initializeModels();
}
// Start session
_session = await RunAnywhere.startVoiceSession(
config: VoiceSessionConfig(
silenceDuration: 1.5,
autoPlayTTS: true,
continuousMode: true,
),
);
setState(() => _isActive = true);
// Handle events
_session!.events.listen((event) {
setState(() {
switch (event) {
case VoiceSessionListening(:final audioLevel):
_audioLevel = audioLevel;
_status = 'Listening...';
case VoiceSessionSpeechStarted():
_status = 'Speech detected';
case VoiceSessionProcessing():
_status = 'Processing...';
case VoiceSessionTranscribed(:final text):
_messages.add(ChatMessage(text: text, isUser: true));
case VoiceSessionResponded(:final text):
_messages.add(ChatMessage(text: text, isUser: false));
case VoiceSessionSpeaking():
_status = 'Speaking...';
case VoiceSessionTurnCompleted():
_status = 'Listening...';
case VoiceSessionError(:final message):
_status = 'Error: $message';
case VoiceSessionStopped():
_status = 'Stopped';
_isActive = false;
default:
break;
}
});
});
}
@override
Widget build(BuildContext context) {
return Scaffold(
appBar: AppBar(title: Text('Voice Assistant')),
body: Column(
children: [
// Chat messages
Expanded(
child: ListView.builder(
itemCount: _messages.length,
itemBuilder: (context, index) {
final msg = _messages[index];
return Align(
alignment: msg.isUser
? Alignment.centerRight
: Alignment.centerLeft,
child: Container(
margin: EdgeInsets.all(8),
padding: EdgeInsets.all(12),
decoration: BoxDecoration(
color: msg.isUser ? Colors.blue : Colors.grey[300],
borderRadius: BorderRadius.circular(12),
),
child: Text(
msg.text,
style: TextStyle(
color: msg.isUser ? Colors.white : Colors.black,
),
),
),
);
},
),
),
// Status and controls
Container(
padding: EdgeInsets.all(16),
child: Column(
children: [
// Audio level indicator
if (_isActive)
Container(
height: 4,
child: LinearProgressIndicator(
value: _audioLevel.clamp(0.0, 1.0),
),
),
SizedBox(height: 8),
Text(_status),
SizedBox(height: 16),
// Start/Stop button
FloatingActionButton(
onPressed: _toggleSession,
backgroundColor: _isActive ? Colors.red : Colors.blue,
child: Icon(_isActive ? Icons.stop : Icons.mic),
),
],
),
),
],
),
);
}
@override
void dispose() {
_session?.stop();
super.dispose();
}
}
class ChatMessage {
final String text;
final bool isUser;
ChatMessage({required this.text, required this.isUser});
}
Push-to-Talk Mode
For push-to-talk instead of continuous listening:
final session = await RunAnywhere.startVoiceSession(
config: VoiceSessionConfig(
continuousMode: false, // Don't auto-resume
),
);
// Manual control
void onButtonDown() async {
await session.start();
}
void onButtonUp() async {
await session.sendNow(); // Force process current audio
}
Component States
Check individual component states:
final states = RunAnywhere.getVoiceAgentComponentStates();
print('STT: ${states.stt}'); // loaded, loading, unloaded, error
print('LLM: ${states.llm}');
print('TTS: ${states.tts}');
print('All ready: ${states.isFullyReady}');
Cleanup
// Stop session
session.stop();
// Cleanup resources
RunAnywhere.cleanupVoiceAgent();
// Optionally unload models to free memory
await RunAnywhere.unloadSTTModel();
await RunAnywhere.unloadModel();
await RunAnywhere.unloadTTSVoice();
See Also
VAD
Voice activity detection