synthesize() method converts text to natural-sounding speech using on-device neural voice models.
Basic Usage
Copy
Ask AI
// Synthesize text to audio
let output = try await RunAnywhere.synthesize("Hello! Welcome to RunAnywhere.")
// Play the audio
let player = try AVAudioPlayer(data: output.audioData)
player.play()
Setup
Before synthesizing, register the ONNX module and load a TTS voice:Copy
Ask AI
import RunAnywhere
import ONNXRuntime
// Register ONNX module (at app launch)
@MainActor
func setup() {
ONNX.register()
}
// Load TTS voice
try await RunAnywhere.loadTTSVoice("piper-en-us-amy")
Method Signatures
Basic Synthesis
Copy
Ask AI
public static func synthesize(
_ text: String,
options: TTSOptions = TTSOptions()
) async throws -> TTSOutput
Speak (with automatic playback)
Copy
Ask AI
public static func speak(
_ text: String,
options: TTSOptions = TTSOptions()
) async throws -> TTSSpeakResult
TTSOutput
Copy
Ask AI
public struct TTSOutput: Sendable {
public let audioData: Data // Audio bytes
public let format: AudioFormat // WAV, PCM, etc.
public let duration: TimeInterval // Audio duration
public let phonemeTimestamps: [PhonemeTimestamp]? // Phoneme timing
public let metadata: TTSSynthesisMetadata? // Processing info
}
TTSOptions
Copy
Ask AI
public struct TTSOptions: Sendable {
public let rate: Float // Speed (0.5-2.0, default: 1.0)
public let pitch: Float // Pitch (0.5-2.0, default: 1.0)
public let volume: Float // Volume (0.0-1.0, default: 1.0)
public let language: String // Language code
public let sampleRate: Int // Output sample rate
public let audioFormat: AudioFormat // Output format
public init(
rate: Float = 1.0,
pitch: Float = 1.0,
volume: Float = 1.0,
language: String = "en-US",
sampleRate: Int = 22050,
audioFormat: AudioFormat = .wav
)
}
Examples
Simple Text-to-Speech
Copy
Ask AI
let output = try await RunAnywhere.synthesize("Hello world!")
print("Generated \(output.duration) seconds of audio")
With Custom Options
Copy
Ask AI
let options = TTSOptions(
rate: 0.9, // Slightly slower
pitch: 1.1, // Slightly higher pitch
volume: 0.8 // 80% volume
)
let output = try await RunAnywhere.synthesize(
"This is a customized voice.",
options: options
)
Speak (Automatic Playback)
Copy
Ask AI
// Synthesize and play automatically
try await RunAnywhere.speak("Hello! How can I help you today?")
// Check if still speaking
let isSpeaking = await RunAnywhere.isSpeaking
// Stop playback
await RunAnywhere.stopSpeaking()
Playing Audio Manually
Copy
Ask AI
import AVFoundation
class SpeechPlayer {
private var audioPlayer: AVAudioPlayer?
func speak(_ text: String) async throws {
let output = try await RunAnywhere.synthesize(text)
audioPlayer = try AVAudioPlayer(data: output.audioData)
audioPlayer?.play()
}
func stop() {
audioPlayer?.stop()
}
}
SwiftUI Voice Output
Copy
Ask AI
struct TextToSpeechView: View {
@State private var text = ""
@State private var isSpeaking = false
@State private var rate: Float = 1.0
var body: some View {
VStack(spacing: 20) {
// Text input
TextEditor(text: $text)
.frame(height: 150)
.border(Color.gray.opacity(0.3))
// Rate slider
VStack(alignment: .leading) {
Text("Speed: \(String(format: "%.1f", rate))x")
Slider(value: $rate, in: 0.5...2.0)
}
// Control buttons
HStack(spacing: 20) {
Button(action: speak) {
Label("Speak", systemImage: "play.fill")
.padding()
.background(Color.blue)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(text.isEmpty || isSpeaking)
Button(action: stop) {
Label("Stop", systemImage: "stop.fill")
.padding()
.background(Color.red)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(!isSpeaking)
}
}
.padding()
}
func speak() {
isSpeaking = true
Task {
do {
try await RunAnywhere.speak(
text,
options: TTSOptions(rate: rate)
)
await MainActor.run {
isSpeaking = false
}
} catch {
print("Speech failed: \(error)")
await MainActor.run {
isSpeaking = false
}
}
}
}
func stop() {
Task {
await RunAnywhere.stopSpeaking()
await MainActor.run {
isSpeaking = false
}
}
}
}
Save to File
Copy
Ask AI
func saveToFile(text: String, filename: String) async throws -> URL {
let output = try await RunAnywhere.synthesize(text)
let documentsPath = FileManager.default.urls(
for: .documentDirectory,
in: .userDomainMask
).first!
let fileURL = documentsPath.appendingPathComponent("\(filename).wav")
try output.audioData.write(to: fileURL)
return fileURL
}
// Usage
let url = try await saveToFile(text: "Hello world", filename: "greeting")
print("Saved to: \(url.path)")
Voice Management
Copy
Ask AI
// Load a voice
try await RunAnywhere.loadTTSVoice("piper-en-us-amy")
// Check if voice is loaded
let isLoaded = await RunAnywhere.isTTSVoiceLoaded
// Get current voice
let voiceId = await RunAnywhere.currentTTSVoiceId
// List available voices
let voices = await RunAnywhere.availableTTSVoices
for voice in voices {
print(voice)
}
// Unload voice
try await RunAnywhere.unloadTTSVoice()
Synthesis Metadata
Copy
Ask AI
let output = try await RunAnywhere.synthesize(text)
if let metadata = output.metadata {
print("Voice: \(metadata.voice)")
print("Processing time: \(metadata.processingTime)s")
print("Characters: \(metadata.characterCount)")
let charsPerSecond = Double(metadata.characterCount) / metadata.processingTime
print("Speed: \(String(format: "%.0f", charsPerSecond)) chars/sec")
}
Error Handling
Copy
Ask AI
do {
let output = try await RunAnywhere.synthesize(text)
playAudio(output.audioData)
} catch let error as SDKError {
switch error.code {
case .modelNotFound:
print("Load a TTS voice first")
case .processingFailed:
print("Synthesis failed: \(error.message)")
case .invalidInput:
print("Invalid text input")
default:
print("Error: \(error.localizedDescription)")
}
}
Best Practices
Chunk long text
Chunk long text
For long text, use streaming synthesis or split into sentences to reduce latency.
Preload voices
Preload voices
Load TTS voices at app startup to avoid delays when synthesizing.
Handle audio session
Handle audio session
Configure
AVAudioSession appropriately for your app’s audio needs.