Skip to main content
Synthesize text into audio using on-device neural TTS models.

Basic Synthesis

// Load a TTS voice first
RunAnywhere.loadTTSVoice("en-us-default")

// Synthesize text to audio
val output = RunAnywhere.synthesize(
    text = "Hello, welcome to RunAnywhere!",
    options = TTSOptions(rate = 1.0f, pitch = 1.0f)
)

// Play the audio
val audioTrack = createAudioTrack(output.format, output.audioData)
audioTrack.play()

Simple Speak API

For convenience, use speak() which handles synthesis and playback:
// Speak text aloud (handles audio playback automatically)
val result = RunAnywhere.speak(
    text = "Hello, this is a test message",
    options = TTSOptions(rate = 1.0f)
)

println("Duration: ${result.duration}s")

TTSOutput

The synthesis result object:
PropertyTypeDescription
audioDataByteArraySynthesized audio bytes
formatAudioFormatAudio format (PCM, WAV, etc.)
durationDoubleDuration in seconds
phonemeTimestampsList<TTSPhonemeTimestamp>?Phoneme timing
metadataTTSSynthesisMetadataProcessing metrics

TTSOptions

Customize voice synthesis:
data class TTSOptions(
    val voice: String? = null,        // Voice ID (uses loaded voice if null)
    val language: String = "en-US",   // Language code
    val rate: Float = 1.0f,           // Speed (0.0-2.0)
    val pitch: Float = 1.0f,          // Pitch (0.0-2.0)
    val volume: Float = 1.0f,         // Volume (0.0-1.0)
    val audioFormat: AudioFormat = AudioFormat.PCM,
    val sampleRate: Int = 22050,
    val useSSML: Boolean = false      // Enable SSML markup
)

Example: Custom Voice Settings

val output = RunAnywhere.synthesize(
    text = "The quick brown fox jumps over the lazy dog.",
    options = TTSOptions(
        rate = 0.8f,    // Slower speech
        pitch = 1.2f,   // Higher pitch
        volume = 0.9f
    )
)

Example: Playing Audio with AudioTrack

suspend fun playAudio(audioData: ByteArray, sampleRate: Int = 22050) {
    val audioTrack = AudioTrack.Builder()
        .setAudioAttributes(
            AudioAttributes.Builder()
                .setUsage(AudioAttributes.USAGE_MEDIA)
                .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
                .build()
        )
        .setAudioFormat(
            android.media.AudioFormat.Builder()
                .setEncoding(android.media.AudioFormat.ENCODING_PCM_16BIT)
                .setSampleRate(sampleRate)
                .setChannelMask(android.media.AudioFormat.CHANNEL_OUT_MONO)
                .build()
        )
        .setBufferSizeInBytes(audioData.size)
        .setTransferMode(AudioTrack.MODE_STATIC)
        .build()

    audioTrack.write(audioData, 0, audioData.size)
    audioTrack.play()
}

Example: Text-to-Speech ViewModel

class TTSViewModel : ViewModel() {
    private val _isSpeaking = MutableStateFlow(false)
    val isSpeaking: StateFlow<Boolean> = _isSpeaking

    init {
        viewModelScope.launch {
            // Load TTS voice on init
            RunAnywhere.loadTTSVoice("en-us-default")
        }
    }

    fun speak(text: String) {
        viewModelScope.launch {
            _isSpeaking.value = true
            try {
                RunAnywhere.speak(text, TTSOptions(rate = 1.0f))
            } finally {
                _isSpeaking.value = false
            }
        }
    }

    fun stopSpeaking() {
        viewModelScope.launch {
            RunAnywhere.stopSpeaking()
            _isSpeaking.value = false
        }
    }
}

Check Speaking Status

// Check if currently speaking
val isSpeaking = RunAnywhere.isSpeaking()

// Stop current playback
RunAnywhere.stopSpeaking()

Synthesis Metadata

val output = RunAnywhere.synthesize(text, options)

println("Voice: ${output.metadata.voice}")
println("Language: ${output.metadata.language}")
println("Processing time: ${output.metadata.processingTime}s")
println("Characters: ${output.metadata.characterCount}")
println("Speed: ${output.metadata.charactersPerSecond} char/s")