public struct STTOptions: Sendable { public let language: String public let sampleRate: Int public let enableWordTimestamps: Bool public let enableVAD: Bool public init( language: String = "en", sampleRate: Int = 16000, enableWordTimestamps: Bool = false, enableVAD: Bool = true )}
public struct WordTimestamp: Sendable { public let word: String // The word public let startTime: Double // Start time in seconds public let endTime: Double // End time in seconds public let confidence: Float? // Word-level confidence}
func generateSubtitles(from audioData: Data) async throws -> [Subtitle] { let options = STTOptions( language: "en", enableWordTimestamps: true ) let output = try await RunAnywhere.transcribeWithOptions(audioData, options: options) guard let timestamps = output.wordTimestamps else { return [Subtitle(text: output.text, start: 0, end: 5)] } // Group words into subtitle segments (max 7 words per segment) var subtitles: [Subtitle] = [] var currentWords: [WordTimestamp] = [] for word in timestamps { currentWords.append(word) if currentWords.count >= 7 { let text = currentWords.map(\.word).joined(separator: " ") let start = currentWords.first!.startTime let end = currentWords.last!.endTime subtitles.append(Subtitle(text: text, start: start, end: end)) currentWords = [] } } // Add remaining words if !currentWords.isEmpty { let text = currentWords.map(\.word).joined(separator: " ") let start = currentWords.first!.startTime let end = currentWords.last!.endTime subtitles.append(Subtitle(text: text, start: start, end: end)) } return subtitles}struct Subtitle { let text: String let start: Double let end: Double}
VAD filters out silent segments before transcription:
// With VAD (default) - silent parts are skippedlet withVAD = STTOptions(enableVAD: true)// Without VAD - entire audio is processedlet withoutVAD = STTOptions(enableVAD: false)
Enable VAD for real-time transcription to reduce processing time and improve accuracy by ignoring
silence.
The SDK expects audio at 16kHz by default. If your audio has a different sample rate, specify it:
// For 44.1kHz audio (resampling will be applied)let options = STTOptions(sampleRate: 44100)// For 48kHz audiolet options = STTOptions(sampleRate: 48000)
For best results, record audio at 16kHz mono. While the SDK can resample, native 16kHz audio
produces better accuracy.