Basic Usage
Copy
Ask AI
try await RunAnywhere.synthesizeStream(
"This is a long piece of text that will be synthesized in chunks...",
options: TTSOptions(rate: 1.0),
onAudioChunk: { chunk in
// Play or buffer each audio chunk
audioPlayer.enqueue(chunk)
}
)
Method Signature
Copy
Ask AI
public static func synthesizeStream(
_ text: String,
options: TTSOptions = TTSOptions(),
onAudioChunk: @escaping (Data) -> Void
) async throws -> TTSOutput
Parameters
| Parameter | Type | Description |
|---|---|---|
text | String | Text to synthesize |
options | TTSOptions | Synthesis options |
onAudioChunk | (Data) -> Void | Callback for each audio chunk |
Returns
TTSOutput with the complete synthesized audio after streaming completes.
Streaming Audio Player
Build a player that handles streaming audio chunks:Copy
Ask AI
import AVFoundation
class StreamingAudioPlayer {
private var audioEngine: AVAudioEngine?
private var playerNode: AVAudioPlayerNode?
private var audioFormat: AVAudioFormat?
func prepare(sampleRate: Double = 22050) throws {
audioEngine = AVAudioEngine()
playerNode = AVAudioPlayerNode()
guard let engine = audioEngine, let player = playerNode else { return }
audioFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: sampleRate,
channels: 1,
interleaved: false
)
engine.attach(player)
engine.connect(player, to: engine.mainMixerNode, format: audioFormat)
try engine.start()
player.play()
}
func enqueue(_ audioData: Data) {
guard let format = audioFormat,
let player = playerNode else { return }
let frameCount = UInt32(audioData.count) / UInt32(MemoryLayout<Float>.size)
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount) else { return }
buffer.frameLength = frameCount
audioData.withUnsafeBytes { ptr in
if let address = ptr.baseAddress?.assumingMemoryBound(to: Float.self) {
buffer.floatChannelData?[0].update(from: address, count: Int(frameCount))
}
}
player.scheduleBuffer(buffer)
}
func stop() {
playerNode?.stop()
audioEngine?.stop()
}
}
SwiftUI Integration
Copy
Ask AI
struct StreamingTTSView: View {
@State private var text = "Hello! This is a demonstration of streaming text-to-speech synthesis. The audio will begin playing before the entire text is processed, providing a more responsive experience for longer content."
@State private var isSynthesizing = false
@State private var progress: Double = 0
@StateObject private var player = StreamingAudioPlayerWrapper()
var body: some View {
VStack(spacing: 20) {
// Text input
TextEditor(text: $text)
.frame(height: 150)
.border(Color.gray.opacity(0.3))
// Progress indicator
if isSynthesizing {
ProgressView(value: progress)
.progressViewStyle(.linear)
}
// Control button
Button(action: synthesize) {
Label(
isSynthesizing ? "Synthesizing..." : "Speak",
systemImage: isSynthesizing ? "waveform" : "play.fill"
)
.frame(maxWidth: .infinity)
.padding()
.background(isSynthesizing ? Color.gray : Color.blue)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(text.isEmpty || isSynthesizing)
}
.padding()
}
func synthesize() {
isSynthesizing = true
progress = 0
Task {
do {
try player.prepare()
var totalChunks = 0
var processedChunks = 0
// Estimate total chunks based on text length
totalChunks = max(1, text.count / 100)
let _ = try await RunAnywhere.synthesizeStream(
text,
options: TTSOptions(rate: 1.0),
onAudioChunk: { chunk in
player.enqueue(chunk)
processedChunks += 1
Task { @MainActor in
progress = min(1.0, Double(processedChunks) / Double(totalChunks))
}
}
)
await MainActor.run {
isSynthesizing = false
progress = 1.0
}
} catch {
print("Synthesis failed: \(error)")
await MainActor.run {
isSynthesizing = false
}
}
}
}
}
@MainActor
class StreamingAudioPlayerWrapper: ObservableObject {
private let player = StreamingAudioPlayer()
func prepare() throws {
try player.prepare()
}
func enqueue(_ data: Data) {
player.enqueue(data)
}
func stop() {
player.stop()
}
}
Sentence-by-Sentence Streaming
For natural pauses, split text into sentences:Copy
Ask AI
func speakSentenceBySentence(_ text: String) async throws {
let sentences = text.components(separatedBy: CharacterSet(charactersIn: ".!?"))
.map { $0.trimmingCharacters(in: .whitespaces) }
.filter { !$0.isEmpty }
for sentence in sentences {
let output = try await RunAnywhere.synthesize(
sentence + ".",
options: TTSOptions(rate: 1.0)
)
// Play and wait for completion
let player = try AVAudioPlayer(data: output.audioData)
player.play()
// Wait for playback to complete
try await Task.sleep(for: .seconds(output.duration))
}
}
Buffered Streaming
Buffer chunks for smoother playback:Copy
Ask AI
class BufferedStreamingPlayer {
private var buffer: [Data] = []
private var isPlaying = false
private let minimumBufferSize = 3 // Start playing after 3 chunks
func addChunk(_ chunk: Data) {
buffer.append(chunk)
if buffer.count >= minimumBufferSize && !isPlaying {
startPlayback()
}
}
private func startPlayback() {
isPlaying = true
Task {
while !buffer.isEmpty || isPlaying {
if let chunk = buffer.first {
buffer.removeFirst()
await playChunk(chunk)
} else {
try? await Task.sleep(for: .milliseconds(50))
}
}
}
}
private func playChunk(_ data: Data) async {
// Play audio chunk
}
func finish() {
isPlaying = false
}
}
Use Cases
Audiobook reader
Audiobook reader
Stream long-form content with immediate playback.
Copy
Ask AI
func readChapter(_ text: String) async throws {
try await RunAnywhere.synthesizeStream(
text,
options: TTSOptions(rate: 0.9),
onAudioChunk: { audioPlayer.enqueue($0) }
)
}
AI assistant voice
AI assistant voice
Stream LLM responses as they’re generated.
Copy
Ask AI
let llmResult = try await RunAnywhere.generateStream(prompt)
var accumulatedText = ""
for try await token in llmResult.stream {
accumulatedText += token
// Synthesize complete sentences
if token.contains(".") || token.contains("!") || token.contains("?") {
try await RunAnywhere.synthesizeStream(accumulatedText) { chunk in
player.enqueue(chunk)
}
accumulatedText = ""
}
}
Accessibility features
Accessibility features
Read screen content aloud as user navigates.
Copy
Ask AI
import SwiftUI
struct AccessibleContentView: View {
@State private var isSpeaking = false
var body: some View {
VStack {
Text("Welcome to the app")
.accessibilityLabel("Welcome to the app")
Button(isSpeaking ? "Stop Reading" : "Read Aloud") {
Task {
if isSpeaking {
await RunAnywhere.stopSpeaking()
} else {
isSpeaking = true
try? await RunAnywhere.synthesizeStream(
"Welcome to the app. Navigate using swipe gestures.",
options: TTSOptions(rate: 0.9),
onAudioChunk: { chunk in
AudioPlayer.shared.enqueue(chunk)
}
)
isSpeaking = false
}
}
}
.accessibilityHint("Reads the current screen content aloud")
}
}
}
Error Handling
Copy
Ask AI
do {
try await RunAnywhere.synthesizeStream(
text,
options: TTSOptions(),
onAudioChunk: { chunk in
player.enqueue(chunk)
}
)
} catch let error as SDKError {
switch error.code {
case .modelNotFound:
print("Load a TTS voice first")
case .processingFailed:
print("Streaming synthesis failed")
default:
print("Error: \(error)")
}
// Stop player on error
player.stop()
}
Performance Tips
Buffer before playing: Wait for a few chunks before starting playback to avoid stuttering.
Match sample rates: Ensure your audio player’s sample rate matches the TTS output (typically
22050 Hz).