Basic Usage
Copy
Ask AI
let output = try await RunAnywhere.transcribeStream(
audioData: audioData,
options: STTOptions(language: "en"),
onPartialResult: { result in
print("Partial: \(result.transcript)")
}
)
print("Final: \(output.text)")
Method Signature
Copy
Ask AI
public static func transcribeStream(
audioData: Data,
options: STTOptions = STTOptions(),
onPartialResult: @escaping (STTTranscriptionResult) -> Void
) async throws -> STTOutput
Parameters
| Parameter | Type | Description |
|---|---|---|
audioData | Data | Audio data to transcribe |
options | STTOptions | Transcription options |
onPartialResult | (STTTranscriptionResult) -> Void | Callback for partial results |
Returns
STTOutput with the final complete transcription.
STTTranscriptionResult
Copy
Ask AI
public struct STTTranscriptionResult: Sendable {
public let transcript: String // Current transcription
public let confidence: Float? // Confidence score
public let timestamps: [WordTimestamp]? // Word-level timing
public let language: String? // Detected language
public let alternatives: [STTAlternative]? // Alternative transcriptions
}
Live Transcription Example
Copy
Ask AI
class LiveTranscriber: ObservableObject {
@Published var partialText = ""
@Published var finalText = ""
@Published var isTranscribing = false
private var audioEngine: AVAudioEngine?
private var audioBuffer = Data()
func startLiveTranscription() async throws {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.playAndRecord, mode: .measurement)
try audioSession.setActive(true)
audioEngine = AVAudioEngine()
guard let audioEngine = audioEngine else { return }
let inputNode = audioEngine.inputNode
let recordingFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 16000,
channels: 1,
interleaved: false
)!
isTranscribing = true
inputNode.installTap(onBus: 0, bufferSize: 4096, format: recordingFormat) { [weak self] buffer, _ in
self?.processAudioBuffer(buffer)
}
audioEngine.prepare()
try audioEngine.start()
}
private func processAudioBuffer(_ buffer: AVAudioPCMBuffer) {
// Convert buffer to Data and accumulate
guard let channelData = buffer.floatChannelData?[0] else { return }
let frameCount = Int(buffer.frameLength)
let data = Data(bytes: channelData, count: frameCount * MemoryLayout<Float>.size)
audioBuffer.append(data)
// Process every ~1 second of audio
if audioBuffer.count >= 16000 * 4 { // 1 second at 16kHz, Float32
let chunk = audioBuffer
audioBuffer = Data()
Task {
await transcribeChunk(chunk)
}
}
}
private func transcribeChunk(_ data: Data) async {
do {
let output = try await RunAnywhere.transcribeStream(
audioData: data,
options: STTOptions(language: "en"),
onPartialResult: { [weak self] result in
Task { @MainActor in
self?.partialText = result.transcript
}
}
)
await MainActor.run {
finalText += " " + output.text
partialText = ""
}
} catch {
print("Transcription error: \(error)")
}
}
func stop() {
audioEngine?.stop()
audioEngine?.inputNode.removeTap(onBus: 0)
audioEngine = nil
isTranscribing = false
}
}
SwiftUI Integration
Copy
Ask AI
struct LiveTranscriptionView: View {
@StateObject private var transcriber = LiveTranscriber()
var body: some View {
VStack(spacing: 20) {
// Final transcription
ScrollView {
Text(transcriber.finalText)
.frame(maxWidth: .infinity, alignment: .leading)
}
.frame(height: 200)
.padding()
.background(Color.gray.opacity(0.1))
.cornerRadius(12)
// Partial (in-progress) transcription
if !transcriber.partialText.isEmpty {
Text(transcriber.partialText)
.foregroundColor(.secondary)
.italic()
}
// Recording indicator
HStack {
if transcriber.isTranscribing {
Circle()
.fill(Color.red)
.frame(width: 12, height: 12)
Text("Listening...")
}
}
// Control buttons
HStack(spacing: 20) {
Button(action: startTranscription) {
Label("Start", systemImage: "mic.fill")
.padding()
.background(Color.green)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(transcriber.isTranscribing)
Button(action: stopTranscription) {
Label("Stop", systemImage: "stop.fill")
.padding()
.background(Color.red)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(!transcriber.isTranscribing)
}
}
.padding()
}
func startTranscription() {
Task {
try await transcriber.startLiveTranscription()
}
}
func stopTranscription() {
transcriber.stop()
}
}
Voice Command Detection
Use streaming STT to detect commands in real-time:Copy
Ask AI
class VoiceCommandDetector {
private let commands = ["play", "pause", "stop", "next", "previous"]
var onCommandDetected: ((String) -> Void)?
func processAudio(_ data: Data) async {
do {
let _ = try await RunAnywhere.transcribeStream(
audioData: data,
options: STTOptions(language: "en"),
onPartialResult: { [weak self] result in
self?.checkForCommands(result.transcript)
}
)
} catch {
print("Error: \(error)")
}
}
private func checkForCommands(_ text: String) {
let lowercased = text.lowercased()
for command in commands {
if lowercased.contains(command) {
onCommandDetected?(command)
break
}
}
}
}
// Usage
let detector = VoiceCommandDetector()
detector.onCommandDetected = { command in
switch command {
case "play":
player.play()
case "pause":
player.pause()
default:
break
}
}
Performance Tips
Chunk size
Chunk size
Process audio in chunks of 1-2 seconds for best balance of latency and accuracy.
Buffer management
Buffer management
Clear audio buffers after processing to prevent memory growth during long sessions.
Background processing
Background processing
Transcription runs in background threads automatically. Update UI on MainActor.
Error Handling
Copy
Ask AI
do {
let output = try await RunAnywhere.transcribeStream(
audioData: data,
onPartialResult: { _ in }
)
} catch let error as SDKError {
switch error.code {
case .modelNotFound:
print("Load STT model first")
case .emptyAudioBuffer:
print("No audio data")
case .processingFailed:
print("Streaming failed: \(error.message)")
default:
print("Error: \(error)")
}
}