Documentation Index Fetch the complete documentation index at: https://docs.runanywhere.ai/llms.txt
Use this file to discover all available pages before exploring further.
Follow these best practices to build high-performance, reliable apps with the RunAnywhere SDK.
Initialization
Initialize Early
Initialize the SDK at app launch, not when first needed. The correct order is: initialize → register backends → register models .
@main
struct MyApp : App {
init () {
Task { @MainActor in
// 1. Initialize SDK first
try ? RunAnywhere. initialize ( environment : . development )
// 2. Register backend modules
LlamaCPP. register ()
ONNX. register ()
// 3. Register models
ModelService. registerDefaultModels ()
print ( "SDK v \( RunAnywhere. version ) initialized" )
}
}
var body: some Scene {
WindowGroup {
ContentView ()
}
}
}
Pre-load Models
Load models during onboarding or splash screen:
struct SplashView : View {
@State private var loadingProgress = 0.0
@State private var isReady = false
var body: some View {
VStack {
ProgressView ( value : loadingProgress)
Text ( "Loading AI models..." )
}
. task {
await loadModels ()
}
}
func loadModels () async {
// Load LLM (50%)
try ? await RunAnywhere. loadModel ( "llama-3.2-1b-instruct-q4" )
loadingProgress = 0.5
// Load STT (75%)
try ? await RunAnywhere. loadSTTModel ( "whisper-base-onnx" )
loadingProgress = 0.75
// Load TTS (100%)
try ? await RunAnywhere. loadTTSVoice ( "piper-en-us-amy" )
loadingProgress = 1.0
isReady = true
}
}
Memory Management
Unload When Not Needed
Free memory by unloading unused models:
class ModelManager {
enum ActiveFeature {
case chat , voice , none
}
func switchTo ( _ feature : ActiveFeature) async throws {
switch feature {
case . chat :
// Only need LLM
try ? await RunAnywhere. unloadSTTModel ()
try ? await RunAnywhere. unloadTTSVoice ()
case . voice :
// Need all voice components
try await RunAnywhere. loadSTTModel ( "whisper-base-onnx" )
try await RunAnywhere. loadTTSVoice ( "piper-en-us-amy" )
case . none :
// Free all memory
try ? await RunAnywhere. unloadModel ()
try ? await RunAnywhere. unloadSTTModel ()
try ? await RunAnywhere. unloadTTSVoice ()
}
}
}
Monitor Memory
func checkMemoryBeforeOperation () -> Bool {
let memoryInfo = ProcessInfo. processInfo
let available = memoryInfo. physicalMemory
// Require at least 2GB free for LLM operations
return available > 2_000_000_000
}
Handle Memory Warnings
class AppDelegate : NSObject , UIApplicationDelegate {
func applicationDidReceiveMemoryWarning ( _ application : UIApplication) {
Task {
// Unload non-essential models
try ? await RunAnywhere. unloadSTTModel ()
try ? await RunAnywhere. unloadTTSVoice ()
try ? await RunAnywhere. cleanTempFiles ()
}
}
}
Model Selection
Choose Appropriate Model Sizes
Device Recommended LLM Notes iPhone 12/13 (4GB) 1B Q4 May need to unload others iPhone 14/15 (6GB) 1-3B Q4 Good for most use cases iPhone 15 Pro (8GB) 3B Q4, some 7B More headroom iPad Pro 3-7B Q4 Depends on RAM M1+ Mac 7B+ Q4 Ample memory
Device-Specific Loading
func selectModelForDevice () -> String {
let memory = ProcessInfo. processInfo . physicalMemory
switch memory {
case ..< 4_000_000_000 :
return "llama-3.2-1b-instruct-q4" // 1B for 4GB devices
case 4_000_000_000 ..< 8_000_000_000 :
return "llama-3.2-3b-instruct-q4" // 3B for 4-8GB
default :
return "llama-3.2-7b-instruct-q4" // 7B for 8GB+
}
}
Streaming for Responsiveness
Always Stream for Long Outputs
// ❌ User waits for entire response
let result = try await RunAnywhere. generate (prompt, options : LLMGenerationOptions ( maxTokens : 500 ))
// ✅ User sees tokens immediately
let result = try await RunAnywhere. generateStream (prompt)
for try await token in result.stream {
updateUI ( with : token)
}
Batch UI Updates
func streamWithBatching () async throws {
let result = try await RunAnywhere. generateStream (prompt)
var buffer = ""
var lastUpdate = Date ()
for try await token in result.stream {
buffer += token
// Update UI every 50ms instead of every token
if Date (). timeIntervalSince (lastUpdate) > 0.05 {
await MainActor. run {
self . displayText += buffer
}
buffer = ""
lastUpdate = Date ()
}
}
// Flush remaining
if ! buffer. isEmpty {
await MainActor. run {
self . displayText += buffer
}
}
}
Threading
Use Appropriate Actors
// ✅ UI updates on MainActor
await MainActor. run {
self . response = result. text
}
// ✅ Heavy operations stay off main thread
Task. detached {
try await RunAnywhere. loadModel (modelId)
}
Don’t Block the Main Thread
// ❌ Blocking main thread
@MainActor
func loadModelSync () {
// This blocks UI!
RunLoop. current . run ( until : Date ( timeIntervalSinceNow : 5 ))
}
// ✅ Async loading with UI feedback
@MainActor
func loadModelAsync () {
isLoading = true
Task {
try await RunAnywhere. loadModel (modelId)
await MainActor. run {
isLoading = false
}
}
}
Error Recovery
Implement Retries
func generateWithRetry ( prompt : String , maxAttempts : Int = 3 ) async throws -> String {
for attempt in 1 ... maxAttempts {
do {
return try await RunAnywhere. chat (prompt)
} catch let error as SDKError where error. code == . timeout {
if attempt < maxAttempts {
try await Task. sleep ( for : . seconds ( 1 ))
continue
}
}
}
throw SDKError. general (. timeout , "Max retries exceeded" )
}
Graceful Fallbacks
func speak ( _ text : String ) async {
do {
// Try neural voice
try await RunAnywhere. speak (text)
} catch {
// Fall back to system voice
let synthesizer = AVSpeechSynthesizer ()
let utterance = AVSpeechUtterance ( string : text)
synthesizer. speak (utterance)
}
}
Storage
Check Before Downloads
func downloadModelSafely ( _ model : ModelInfo) async throws {
let storage = await RunAnywhere. getStorageInfo ()
let required = model. downloadSize ?? 0
guard storage.availableBytes > required + 500_000_000 else { // 500MB buffer
throw SDKError. general (. insufficientStorage , "Not enough storage" )
}
try await Download. shared . downloadModel (model)
}
Clean Up Regularly
func performMaintenance () async {
// Clean temp files
try ? await RunAnywhere. cleanTempFiles ()
// Remove unused downloaded models
let models = try ? await RunAnywhere. availableModels ()
for model in models ?? [] {
if model.isDownloaded && ! isModelNeeded (model) {
// Delete unused model
}
}
}
Event Handling
Subscribe to Events
import Combine
class AIViewModel : ObservableObject {
@Published var isGenerating = false
@Published var tokensPerSecond = 0.0
private var cancellables = Set < AnyCancellable > ()
init () {
RunAnywhere. events . events ( for : . llm )
. receive ( on : DispatchQueue. main )
. sink { [ weak self ] event in
self ? . handleLLMEvent (event)
}
. store ( in : & cancellables)
}
private func handleLLMEvent ( _ event : SDKEvent) {
switch event.type {
case "generation.started" :
isGenerating = true
case "generation.completed" :
isGenerating = false
default :
break
}
}
}
Testing
Use Development Environment
# if DEBUG
try RunAnywhere. initialize ( environment : . development )
RunAnywhere. setLogLevel (. debug )
# endif
Mock for Unit Tests
protocol AIService {
func generate ( _ prompt : String ) async throws -> String
}
class RealAIService : AIService {
func generate ( _ prompt : String ) async throws -> String {
try await RunAnywhere. chat (prompt)
}
}
class MockAIService : AIService {
func generate ( _ prompt : String ) async throws -> String {
return "Mock response for: \( prompt ) "
}
}
Permissions (Info.plist)
Your app’s Info.plist must include usage descriptions for all hardware your app accesses:
Key Required For Example Value NSMicrophoneUsageDescriptionSTT, Voice Pipeline ”This app uses the microphone for on-device speech recognition.” NSSpeechRecognitionUsageDescriptionSTT ”This app uses on-device speech recognition.” NSCameraUsageDescriptionVLM (Vision) “This app uses the camera for on-device vision AI.” NSPhotoLibraryUsageDescriptionVLM, Diffusion ”This app accesses your photo library for on-device image analysis.”
macOS App Sandbox
For macOS targets, disable the App Sandbox to allow model file downloads and storage:
Set ENABLE_APP_SANDBOX = NO in your Xcode build settings.
Backend Registration Order
Backends must be registered after RunAnywhere.initialize() and before any model registration or loading:
// ✅ Correct order
try RunAnywhere. initialize ( environment : . development )
LlamaCPP. register ()
ONNX. register ()
ModelService. registerDefaultModels ()
// ❌ Wrong — backends registered before init
LlamaCPP. register ()
try RunAnywhere. initialize ( environment : . development )
Sequential Model Downloads
Download models sequentially (not in parallel) to avoid race conditions in the SDK’s download service:
// ✅ Sequential downloads
try await downloadAndLoad (llmModelId)
try await downloadAndLoad (sttModelId)
try await downloadAndLoad (ttsModelId)
// ❌ Parallel downloads may cause issues
async let a = downloadAndLoad (llmModelId)
async let b = downloadAndLoad (sttModelId)
VLM Model Loading
VLM model loading requires a ModelDescriptor object, not just a model ID. Fetch it from availableModels():
let models = try await RunAnywhere. availableModels ()
let vlmModel = models. first ( where : { $0 . id == vlmModelId }) !
try await RunAnywhere. loadVLMModel ( model : vlmModel)
Try-Then-Download Pattern
Always attempt loadModel() first (succeeds if the model is already cached), and only download on failure:
do {
try await RunAnywhere. loadModel ( modelId : modelId)
} catch {
let progress = try await RunAnywhere. downloadModel ( modelId : modelId)
for await p in progress { if p.stage == .completed { break } }
try await RunAnywhere. loadModel ( modelId : modelId)
}
STT transcription requires audio recorded as 16kHz, mono, 16-bit linear PCM (WAV) . The minimum data threshold is ~1600 bytes (~0.1 seconds at 16kHz).
When building for both iOS and macOS, use conditional compilation for platform-specific APIs:
# if os ( iOS )
let image = VLMImage ( image : uiImage)
# elseif os ( macOS )
let image = VLMImage ( rgbPixels : pixelData, width : width, height : height)
# endif
AVAudioSession is only available on iOS — skip audio session setup on macOS.
Quick Reference
Scenario Recommendation App launch Initialize SDK, register modules Before first use Pre-load models Long text generation Use streaming Low memory devices Use smaller models, unload when done Voice features Use Voice Agent for full pipeline Production Set log level to .warning Errors Implement retries and fallbacks Background Unload models to free memory
Configuration SDK configuration →
Error Handling Handle errors gracefully →