Streaming TTS generates audio in chunks, enabling playback to begin before the entire text is synthesized. This is ideal for long content where you want immediate audio feedback.Documentation Index
Fetch the complete documentation index at: https://docs.runanywhere.ai/llms.txt
Use this file to discover all available pages before exploring further.
Basic Usage
try await RunAnywhere.synthesizeStream(
"This is a long piece of text that will be synthesized in chunks...",
options: TTSOptions(rate: 1.0),
onAudioChunk: { chunk in
// Play or buffer each audio chunk
audioPlayer.enqueue(chunk)
}
)
Method Signature
public static func synthesizeStream(
_ text: String,
options: TTSOptions = TTSOptions(),
onAudioChunk: @escaping (Data) -> Void
) async throws -> TTSOutput
Parameters
| Parameter | Type | Description |
|---|---|---|
text | String | Text to synthesize |
options | TTSOptions | Synthesis options |
onAudioChunk | (Data) -> Void | Callback for each audio chunk |
Returns
TTSOutput with the complete synthesized audio after streaming completes.
Streaming Audio Player
Build a player that handles streaming audio chunks:import AVFoundation
class StreamingAudioPlayer {
private var audioEngine: AVAudioEngine?
private var playerNode: AVAudioPlayerNode?
private var audioFormat: AVAudioFormat?
func prepare(sampleRate: Double = 22050) throws {
audioEngine = AVAudioEngine()
playerNode = AVAudioPlayerNode()
guard let engine = audioEngine, let player = playerNode else { return }
audioFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: sampleRate,
channels: 1,
interleaved: false
)
engine.attach(player)
engine.connect(player, to: engine.mainMixerNode, format: audioFormat)
try engine.start()
player.play()
}
func enqueue(_ audioData: Data) {
guard let format = audioFormat,
let player = playerNode else { return }
let frameCount = UInt32(audioData.count) / UInt32(MemoryLayout<Float>.size)
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount) else { return }
buffer.frameLength = frameCount
audioData.withUnsafeBytes { ptr in
if let address = ptr.baseAddress?.assumingMemoryBound(to: Float.self) {
buffer.floatChannelData?[0].update(from: address, count: Int(frameCount))
}
}
player.scheduleBuffer(buffer)
}
func stop() {
playerNode?.stop()
audioEngine?.stop()
}
}
SwiftUI Integration
struct StreamingTTSView: View {
@State private var text = "Hello! This is a demonstration of streaming text-to-speech synthesis. The audio will begin playing before the entire text is processed, providing a more responsive experience for longer content."
@State private var isSynthesizing = false
@State private var progress: Double = 0
@StateObject private var player = StreamingAudioPlayerWrapper()
var body: some View {
VStack(spacing: 20) {
// Text input
TextEditor(text: $text)
.frame(height: 150)
.border(Color.gray.opacity(0.3))
// Progress indicator
if isSynthesizing {
ProgressView(value: progress)
.progressViewStyle(.linear)
}
// Control button
Button(action: synthesize) {
Label(
isSynthesizing ? "Synthesizing..." : "Speak",
systemImage: isSynthesizing ? "waveform" : "play.fill"
)
.frame(maxWidth: .infinity)
.padding()
.background(isSynthesizing ? Color.gray : Color.blue)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(text.isEmpty || isSynthesizing)
}
.padding()
}
func synthesize() {
isSynthesizing = true
progress = 0
Task {
do {
try player.prepare()
var totalChunks = 0
var processedChunks = 0
// Estimate total chunks based on text length
totalChunks = max(1, text.count / 100)
let _ = try await RunAnywhere.synthesizeStream(
text,
options: TTSOptions(rate: 1.0),
onAudioChunk: { chunk in
player.enqueue(chunk)
processedChunks += 1
Task { @MainActor in
progress = min(1.0, Double(processedChunks) / Double(totalChunks))
}
}
)
await MainActor.run {
isSynthesizing = false
progress = 1.0
}
} catch {
print("Synthesis failed: \(error)")
await MainActor.run {
isSynthesizing = false
}
}
}
}
}
@MainActor
class StreamingAudioPlayerWrapper: ObservableObject {
private let player = StreamingAudioPlayer()
func prepare() throws {
try player.prepare()
}
func enqueue(_ data: Data) {
player.enqueue(data)
}
func stop() {
player.stop()
}
}
Sentence-by-Sentence Streaming
For natural pauses, split text into sentences:func speakSentenceBySentence(_ text: String) async throws {
let sentences = text.components(separatedBy: CharacterSet(charactersIn: ".!?"))
.map { $0.trimmingCharacters(in: .whitespaces) }
.filter { !$0.isEmpty }
for sentence in sentences {
let output = try await RunAnywhere.synthesize(
sentence + ".",
options: TTSOptions(rate: 1.0)
)
// Play and wait for completion
let player = try AVAudioPlayer(data: output.audioData)
player.play()
// Wait for playback to complete
try await Task.sleep(for: .seconds(output.duration))
}
}
Buffered Streaming
Buffer chunks for smoother playback:class BufferedStreamingPlayer {
private var buffer: [Data] = []
private var isPlaying = false
private let minimumBufferSize = 3 // Start playing after 3 chunks
func addChunk(_ chunk: Data) {
buffer.append(chunk)
if buffer.count >= minimumBufferSize && !isPlaying {
startPlayback()
}
}
private func startPlayback() {
isPlaying = true
Task {
while !buffer.isEmpty || isPlaying {
if let chunk = buffer.first {
buffer.removeFirst()
await playChunk(chunk)
} else {
try? await Task.sleep(for: .milliseconds(50))
}
}
}
}
private func playChunk(_ data: Data) async {
// Play audio chunk
}
func finish() {
isPlaying = false
}
}
Use Cases
Audiobook reader
Audiobook reader
Stream long-form content with immediate playback.
func readChapter(_ text: String) async throws {
try await RunAnywhere.synthesizeStream(
text,
options: TTSOptions(rate: 0.9),
onAudioChunk: { audioPlayer.enqueue($0) }
)
}
AI assistant voice
AI assistant voice
Stream LLM responses as they’re generated.
let llmResult = try await RunAnywhere.generateStream(prompt)
var accumulatedText = ""
for try await token in llmResult.stream {
accumulatedText += token
// Synthesize complete sentences
if token.contains(".") || token.contains("!") || token.contains("?") {
try await RunAnywhere.synthesizeStream(accumulatedText) { chunk in
player.enqueue(chunk)
}
accumulatedText = ""
}
}
Accessibility features
Accessibility features
Read screen content aloud as user navigates.
import SwiftUI
struct AccessibleContentView: View {
@State private var isSpeaking = false
var body: some View {
VStack {
Text("Welcome to the app")
.accessibilityLabel("Welcome to the app")
Button(isSpeaking ? "Stop Reading" : "Read Aloud") {
Task {
if isSpeaking {
await RunAnywhere.stopSpeaking()
} else {
isSpeaking = true
try? await RunAnywhere.synthesizeStream(
"Welcome to the app. Navigate using swipe gestures.",
options: TTSOptions(rate: 0.9),
onAudioChunk: { chunk in
AudioPlayer.shared.enqueue(chunk)
}
)
isSpeaking = false
}
}
}
.accessibilityHint("Reads the current screen content aloud")
}
}
}
Error Handling
do {
try await RunAnywhere.synthesizeStream(
text,
options: TTSOptions(),
onAudioChunk: { chunk in
player.enqueue(chunk)
}
)
} catch let error as SDKError {
switch error.code {
case .modelNotFound:
print("Load a TTS voice first")
case .processingFailed:
print("Streaming synthesis failed")
default:
print("Error: \(error)")
}
// Stop player on error
player.stop()
}
Performance Tips
Buffer before playing: Wait for a few chunks before starting playback to avoid stuttering.
Match sample rates: Ensure your audio player’s sample rate matches the TTS output (typically
22050 Hz).
synthesize()
Non-streaming synthesis →
Voices
Available voices →