Documentation Index Fetch the complete documentation index at: https://docs.runanywhere.ai/llms.txt
Use this file to discover all available pages before exploring further.
The transcribe() method converts audio data to text using on-device speech recognition models like Whisper.
Basic Usage
// Simple transcription
let text = try await RunAnywhere. transcribe (audioData)
print ( "You said: \( text ) " )
Setup
Before transcribing, register the ONNX module and load an STT model:
import RunAnywhere
import ONNXRuntime
// Register ONNX module (at app launch)
@MainActor
func setup () {
ONNX. register ()
}
// Load STT model (use the registered model ID)
try await RunAnywhere. loadSTTModel ( modelId : "sherpa-onnx-whisper-tiny.en" )
Method Signatures
Simple Transcription
public static func transcribe ( _ audioData : Data) async throws -> String
Returns just the transcribed text.
Transcription with Options
public static func transcribeWithOptions (
_ audioData : Data,
options : STTOptions
) async throws -> STTOutput
Returns detailed output including confidence and timestamps.
Buffer Transcription
public static func transcribeBuffer (
_ buffer : AVAudioPCMBuffer,
language : String ? = nil
) async throws -> STTOutput
Transcribe directly from an AVAudioPCMBuffer.
Audio Requirements
Property Requirement Sample Rate 16,000 Hz (recommended) Channels Mono (1 channel) Format Float32 or Int16 PCM Duration Up to 30 seconds per call (Whisper limitation)
STTOutput
public struct STTOutput : Sendable {
public let text: String // Transcribed text
public let confidence: Float ? // Confidence score (0-1)
public let wordTimestamps: [WordTimestamp] ? // Per-word timing
public let detectedLanguage: String ? // Detected language code
public let alternatives: [STTAlternative] ? // Alternative transcriptions
public let metadata: TranscriptionMetadata ? // Processing info
}
Examples
Recording and Transcribing
import AVFoundation
class AudioRecorder : ObservableObject {
private var audioRecorder: AVAudioRecorder ?
private let audioSession = AVAudioSession. sharedInstance ()
func startRecording () async throws {
try audioSession. setCategory (. playAndRecord , mode : . default )
try audioSession. setActive ( true )
let url = FileManager. default . temporaryDirectory
. appendingPathComponent ( "recording.wav" )
let settings: [ String : Any ] = [
AVFormatIDKey : Int (kAudioFormatLinearPCM),
AVSampleRateKey : 16000 ,
AVNumberOfChannelsKey : 1 ,
AVLinearPCMBitDepthKey : 16 ,
AVLinearPCMIsFloatKey : false
]
audioRecorder = try AVAudioRecorder ( url : url, settings : settings)
audioRecorder ? . record ()
}
func stopAndTranscribe () async throws -> String {
guard let recorder = audioRecorder else { return "" }
recorder. stop ()
let audioData = try Data ( contentsOf : recorder. url )
return try await RunAnywhere. transcribe (audioData)
}
}
With Timestamps
let output = try await RunAnywhere. transcribeWithOptions (
audioData,
options : STTOptions (
language : "en" ,
enableWordTimestamps : true
)
)
print ( "Full text: \( output. text ) " )
if let timestamps = output.wordTimestamps {
for word in timestamps {
print ( " \( word. word ) : \( word. startTime ) s - \( word. endTime ) s" )
}
}
Multi-Language Support
// Auto-detect language
let output = try await RunAnywhere. transcribeWithOptions (
audioData,
options : STTOptions ()
)
print ( "Detected language: \( output. detectedLanguage ?? "unknown" ) " )
// Force specific language
let spanishOutput = try await RunAnywhere. transcribeWithOptions (
audioData,
options : STTOptions ( language : "es" )
)
struct VoiceInputView : View {
@StateObject private var recorder = AudioRecorder ()
@State private var transcription = ""
@State private var isRecording = false
@State private var isProcessing = false
var body: some View {
VStack ( spacing : 20 ) {
// Transcription display
Text (transcription)
. frame ( maxWidth : . infinity , minHeight : 100 , alignment : . topLeading )
. padding ()
. background (Color. gray . opacity ( 0.1 ))
. cornerRadius ( 12 )
// Record button
Button ( action : toggleRecording) {
ZStack {
Circle ()
. fill (isRecording ? Color. red : Color. blue )
. frame ( width : 80 , height : 80 )
if isProcessing {
ProgressView ()
. tint (. white )
} else {
Image ( systemName : isRecording ? "stop.fill" : "mic.fill" )
. font (. title )
. foregroundColor (. white )
}
}
}
. disabled (isProcessing)
Text (isRecording ? "Tap to stop" : "Tap to record" )
. font (. caption )
. foregroundColor (. secondary )
}
. padding ()
}
func toggleRecording () {
if isRecording {
stopRecording ()
} else {
startRecording ()
}
}
func startRecording () {
Task {
do {
try await recorder. startRecording ()
isRecording = true
} catch {
transcription = "Failed to start recording: \( error ) "
}
}
}
func stopRecording () {
isRecording = false
isProcessing = true
Task {
do {
let text = try await recorder. stopAndTranscribe ()
await MainActor. run {
transcription = text
isProcessing = false
}
} catch {
await MainActor. run {
transcription = "Transcription failed: \( error ) "
isProcessing = false
}
}
}
}
}
Model Management
// Load a model
try await RunAnywhere. loadSTTModel ( modelId : "sherpa-onnx-whisper-tiny.en" )
// Check if loaded
let isLoaded = await RunAnywhere. isSTTModelLoaded
// Get current model
let model = await RunAnywhere. currentSTTModel
// Unload when done
try await RunAnywhere. unloadSTTModel ()
Available Models
These are the Sherpa-ONNX Whisper models available as tar.gz archives:
Model ID Size Quality Speed sherpa-onnx-whisper-tiny.en~40MB Good Fastest sherpa-onnx-whisper-base.en~150MB Better Fast sherpa-onnx-whisper-small.en~500MB Best Slower
Register models with framework: .onnx, modality: .speechRecognition, and artifactType: .archive(.tarGz, structure: .nestedDirectory).
Error Handling
do {
let text = try await RunAnywhere. transcribe (audioData)
print (text)
} catch let error as SDKError {
switch error.code {
case . modelNotFound :
print ( "Load an STT model first" )
case . emptyAudioBuffer :
print ( "Audio data is empty" )
case . processingFailed :
print ( "Transcription failed: \( error. message ) " )
default :
print ( "Error: \( error. localizedDescription ) " )
}
}
Streaming STT Real-time transcription →
STT Options Configure transcription →