Streaming STT

Stream transcription provides real-time results as audio is being processed, enabling responsive voice interfaces.

Basic Usage

let output = try await RunAnywhere.transcribeStream(
    audioData: audioData,
    options: STTOptions(language: "en"),
    onPartialResult: { result in
        print("Partial: \(result.transcript)")
    }
)

print("Final: \(output.text)")

Method Signature

public static func transcribeStream(
    audioData: Data,
    options: STTOptions = STTOptions(),
    onPartialResult: @escaping (STTTranscriptionResult) -> Void
) async throws -> STTOutput

Parameters

Parameter	Type	Description
`audioData`	`Data`	Audio data to transcribe
`options`	`STTOptions`	Transcription options
`onPartialResult`	`(STTTranscriptionResult) -> Void`	Callback for partial results

Returns

STTOutput with the final complete transcription.

STTTranscriptionResult

public struct STTTranscriptionResult: Sendable {
    public let transcript: String           // Current transcription
    public let confidence: Float?           // Confidence score
    public let timestamps: [WordTimestamp]? // Word-level timing
    public let language: String?            // Detected language
    public let alternatives: [STTAlternative]? // Alternative transcriptions
}

Live Transcription Example

class LiveTranscriber: ObservableObject {
    @Published var partialText = ""
    @Published var finalText = ""
    @Published var isTranscribing = false

    private var audioEngine: AVAudioEngine?
    private var audioBuffer = Data()

    func startLiveTranscription() async throws {
        let audioSession = AVAudioSession.sharedInstance()
        try audioSession.setCategory(.playAndRecord, mode: .measurement)
        try audioSession.setActive(true)

        audioEngine = AVAudioEngine()
        guard let audioEngine = audioEngine else { return }

        let inputNode = audioEngine.inputNode
        let recordingFormat = AVAudioFormat(
            commonFormat: .pcmFormatFloat32,
            sampleRate: 16000,
            channels: 1,
            interleaved: false
        )!

        isTranscribing = true

        inputNode.installTap(onBus: 0, bufferSize: 4096, format: recordingFormat) { [weak self] buffer, _ in
            self?.processAudioBuffer(buffer)
        }

        audioEngine.prepare()
        try audioEngine.start()
    }

    private func processAudioBuffer(_ buffer: AVAudioPCMBuffer) {
        // Convert buffer to Data and accumulate
        guard let channelData = buffer.floatChannelData?[0] else { return }
        let frameCount = Int(buffer.frameLength)

        let data = Data(bytes: channelData, count: frameCount * MemoryLayout<Float>.size)
        audioBuffer.append(data)

        // Process every ~1 second of audio
        if audioBuffer.count >= 16000 * 4 { // 1 second at 16kHz, Float32
            let chunk = audioBuffer
            audioBuffer = Data()

            Task {
                await transcribeChunk(chunk)
            }
        }
    }

    private func transcribeChunk(_ data: Data) async {
        do {
            let output = try await RunAnywhere.transcribeStream(
                audioData: data,
                options: STTOptions(language: "en"),
                onPartialResult: { [weak self] result in
                    Task { @MainActor in
                        self?.partialText = result.transcript
                    }
                }
            )

            await MainActor.run {
                finalText += " " + output.text
                partialText = ""
            }
        } catch {
            print("Transcription error: \(error)")
        }
    }

    func stop() {
        audioEngine?.stop()
        audioEngine?.inputNode.removeTap(onBus: 0)
        audioEngine = nil
        isTranscribing = false
    }
}

SwiftUI Integration

struct LiveTranscriptionView: View {
    @StateObject private var transcriber = LiveTranscriber()

    var body: some View {
        VStack(spacing: 20) {
            // Final transcription
            ScrollView {
                Text(transcriber.finalText)
                    .frame(maxWidth: .infinity, alignment: .leading)
            }
            .frame(height: 200)
            .padding()
            .background(Color.gray.opacity(0.1))
            .cornerRadius(12)

            // Partial (in-progress) transcription
            if !transcriber.partialText.isEmpty {
                Text(transcriber.partialText)
                    .foregroundColor(.secondary)
                    .italic()
            }

            // Recording indicator
            HStack {
                if transcriber.isTranscribing {
                    Circle()
                        .fill(Color.red)
                        .frame(width: 12, height: 12)
                    Text("Listening...")
                }
            }

            // Control buttons
            HStack(spacing: 20) {
                Button(action: startTranscription) {
                    Label("Start", systemImage: "mic.fill")
                        .padding()
                        .background(Color.green)
                        .foregroundColor(.white)
                        .cornerRadius(10)
                }
                .disabled(transcriber.isTranscribing)

                Button(action: stopTranscription) {
                    Label("Stop", systemImage: "stop.fill")
                        .padding()
                        .background(Color.red)
                        .foregroundColor(.white)
                        .cornerRadius(10)
                }
                .disabled(!transcriber.isTranscribing)
            }
        }
        .padding()
    }

    func startTranscription() {
        Task {
            try await transcriber.startLiveTranscription()
        }
    }

    func stopTranscription() {
        transcriber.stop()
    }
}

Voice Command Detection

Use streaming STT to detect commands in real-time:

class VoiceCommandDetector {
    private let commands = ["play", "pause", "stop", "next", "previous"]
    var onCommandDetected: ((String) -> Void)?

    func processAudio(_ data: Data) async {
        do {
            let _ = try await RunAnywhere.transcribeStream(
                audioData: data,
                options: STTOptions(language: "en"),
                onPartialResult: { [weak self] result in
                    self?.checkForCommands(result.transcript)
                }
            )
        } catch {
            print("Error: \(error)")
        }
    }

    private func checkForCommands(_ text: String) {
        let lowercased = text.lowercased()
        for command in commands {
            if lowercased.contains(command) {
                onCommandDetected?(command)
                break
            }
        }
    }
}

// Usage
let detector = VoiceCommandDetector()
detector.onCommandDetected = { command in
    switch command {
    case "play":
        player.play()
    case "pause":
        player.pause()
    default:
        break
    }
}

Performance Tips

Chunk size

Process audio in chunks of 1-2 seconds for best balance of latency and accuracy.

Buffer management

Clear audio buffers after processing to prevent memory growth during long sessions.

Background processing

Transcription runs in background threads automatically. Update UI on MainActor.

Error Handling

do {
    let output = try await RunAnywhere.transcribeStream(
        audioData: data,
        onPartialResult: { _ in }
    )
} catch let error as SDKError {
    switch error.code {
    case .modelNotFound:
        print("Load STT model first")
    case .emptyAudioBuffer:
        print("No audio data")
    case .processingFailed:
        print("Streaming failed: \(error.message)")
    default:
        print("Error: \(error)")
    }
}

Basic Transcription

Non-streaming transcription →

STT Options

Configure transcription →

Getting Started

Swift SDK

Kotlin SDK

React Native SDK

Flutter SDK

Web SDK

Vibe Coding

Basic Usage

Method Signature

Parameters

Returns

STTTranscriptionResult

Live Transcription Example

SwiftUI Integration

Voice Command Detection

Performance Tips

Error Handling

Basic Transcription

STT Options

Getting Started

Swift SDK

Kotlin SDK

React Native SDK

Flutter SDK

Web SDK

Vibe Coding

​Basic Usage

​Method Signature

​Parameters

​Returns

​STTTranscriptionResult

​Live Transcription Example

​SwiftUI Integration

​Voice Command Detection

​Performance Tips

​Error Handling

Basic Transcription

STT Options

Basic Usage

Method Signature

Parameters

Returns

STTTranscriptionResult

Live Transcription Example

SwiftUI Integration

Voice Command Detection

Performance Tips

Error Handling