transcribe() method converts audio data to text using on-device speech recognition models like Whisper.
Basic Usage
Copy
Ask AI
// Simple transcription
let text = try await RunAnywhere.transcribe(audioData)
print("You said: \(text)")
Setup
Before transcribing, register the ONNX module and load an STT model:Copy
Ask AI
import RunAnywhere
import ONNXRuntime
// Register ONNX module (at app launch)
@MainActor
func setup() {
ONNX.register()
}
// Load STT model
try await RunAnywhere.loadSTTModel("whisper-base-onnx")
Method Signatures
Simple Transcription
Copy
Ask AI
public static func transcribe(_ audioData: Data) async throws -> String
Transcription with Options
Copy
Ask AI
public static func transcribeWithOptions(
_ audioData: Data,
options: STTOptions
) async throws -> STTOutput
Buffer Transcription
Copy
Ask AI
public static func transcribeBuffer(
_ buffer: AVAudioPCMBuffer,
language: String? = nil
) async throws -> STTOutput
AVAudioPCMBuffer.
Audio Requirements
| Property | Requirement |
|---|---|
| Sample Rate | 16,000 Hz (recommended) |
| Channels | Mono (1 channel) |
| Format | Float32 or Int16 PCM |
| Duration | Up to 30 seconds per call (Whisper limitation) |
STTOutput
Copy
Ask AI
public struct STTOutput: Sendable {
public let text: String // Transcribed text
public let confidence: Float? // Confidence score (0-1)
public let wordTimestamps: [WordTimestamp]? // Per-word timing
public let detectedLanguage: String? // Detected language code
public let alternatives: [STTAlternative]? // Alternative transcriptions
public let metadata: TranscriptionMetadata? // Processing info
}
Examples
Recording and Transcribing
Copy
Ask AI
import AVFoundation
class AudioRecorder: ObservableObject {
private var audioRecorder: AVAudioRecorder?
private let audioSession = AVAudioSession.sharedInstance()
func startRecording() async throws {
try audioSession.setCategory(.playAndRecord, mode: .default)
try audioSession.setActive(true)
let url = FileManager.default.temporaryDirectory
.appendingPathComponent("recording.wav")
let settings: [String: Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVSampleRateKey: 16000,
AVNumberOfChannelsKey: 1,
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsFloatKey: false
]
audioRecorder = try AVAudioRecorder(url: url, settings: settings)
audioRecorder?.record()
}
func stopAndTranscribe() async throws -> String {
guard let recorder = audioRecorder else { return "" }
recorder.stop()
let audioData = try Data(contentsOf: recorder.url)
return try await RunAnywhere.transcribe(audioData)
}
}
With Timestamps
Copy
Ask AI
let output = try await RunAnywhere.transcribeWithOptions(
audioData,
options: STTOptions(
language: "en",
enableWordTimestamps: true
)
)
print("Full text: \(output.text)")
if let timestamps = output.wordTimestamps {
for word in timestamps {
print("\(word.word): \(word.startTime)s - \(word.endTime)s")
}
}
Multi-Language Support
Copy
Ask AI
// Auto-detect language
let output = try await RunAnywhere.transcribeWithOptions(
audioData,
options: STTOptions()
)
print("Detected language: \(output.detectedLanguage ?? "unknown")")
// Force specific language
let spanishOutput = try await RunAnywhere.transcribeWithOptions(
audioData,
options: STTOptions(language: "es")
)
SwiftUI Voice Input
Copy
Ask AI
struct VoiceInputView: View {
@StateObject private var recorder = AudioRecorder()
@State private var transcription = ""
@State private var isRecording = false
@State private var isProcessing = false
var body: some View {
VStack(spacing: 20) {
// Transcription display
Text(transcription)
.frame(maxWidth: .infinity, minHeight: 100, alignment: .topLeading)
.padding()
.background(Color.gray.opacity(0.1))
.cornerRadius(12)
// Record button
Button(action: toggleRecording) {
ZStack {
Circle()
.fill(isRecording ? Color.red : Color.blue)
.frame(width: 80, height: 80)
if isProcessing {
ProgressView()
.tint(.white)
} else {
Image(systemName: isRecording ? "stop.fill" : "mic.fill")
.font(.title)
.foregroundColor(.white)
}
}
}
.disabled(isProcessing)
Text(isRecording ? "Tap to stop" : "Tap to record")
.font(.caption)
.foregroundColor(.secondary)
}
.padding()
}
func toggleRecording() {
if isRecording {
stopRecording()
} else {
startRecording()
}
}
func startRecording() {
Task {
do {
try await recorder.startRecording()
isRecording = true
} catch {
transcription = "Failed to start recording: \(error)"
}
}
}
func stopRecording() {
isRecording = false
isProcessing = true
Task {
do {
let text = try await recorder.stopAndTranscribe()
await MainActor.run {
transcription = text
isProcessing = false
}
} catch {
await MainActor.run {
transcription = "Transcription failed: \(error)"
isProcessing = false
}
}
}
}
}
Model Management
Copy
Ask AI
// Load a model
try await RunAnywhere.loadSTTModel("whisper-base-onnx")
// Check if loaded
let isLoaded = await RunAnywhere.isSTTModelLoaded
// Get current model
let model = await RunAnywhere.currentSTTModel
// Unload when done
try await RunAnywhere.unloadSTTModel()
Available Models
| Model ID | Size | Quality | Speed |
|---|---|---|---|
whisper-tiny-onnx | ~40MB | Good | Fastest |
whisper-base-onnx | ~150MB | Better | Fast |
whisper-small-onnx | ~500MB | Best | Slower |
Error Handling
Copy
Ask AI
do {
let text = try await RunAnywhere.transcribe(audioData)
print(text)
} catch let error as SDKError {
switch error.code {
case .modelNotFound:
print("Load an STT model first")
case .emptyAudioBuffer:
print("Audio data is empty")
case .processingFailed:
print("Transcription failed: \(error.message)")
default:
print("Error: \(error.localizedDescription)")
}
}