Skip to main content
Follow these best practices to build high-performance, reliable apps with the RunAnywhere SDK.

Initialization

Initialize Early

Initialize the SDK at app launch, not when first needed:
@main
struct MyApp: App {
    init() {
        Task { @MainActor in
            // Register modules
            LlamaCPP.register()
            ONNX.register()

            // Initialize SDK
            try? RunAnywhere.initialize(
                apiKey: Config.apiKey,
                baseURL: Config.baseURL,
                environment: .production
            )
        }
    }

    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}

Pre-load Models

Load models during onboarding or splash screen:
struct SplashView: View {
    @State private var loadingProgress = 0.0
    @State private var isReady = false

    var body: some View {
        VStack {
            ProgressView(value: loadingProgress)
            Text("Loading AI models...")
        }
        .task {
            await loadModels()
        }
    }

    func loadModels() async {
        // Load LLM (50%)
        try? await RunAnywhere.loadModel("llama-3.2-1b-instruct-q4")
        loadingProgress = 0.5

        // Load STT (75%)
        try? await RunAnywhere.loadSTTModel("whisper-base-onnx")
        loadingProgress = 0.75

        // Load TTS (100%)
        try? await RunAnywhere.loadTTSVoice("piper-en-us-amy")
        loadingProgress = 1.0

        isReady = true
    }
}

Memory Management

Unload When Not Needed

Free memory by unloading unused models:
class ModelManager {
    enum ActiveFeature {
        case chat, voice, none
    }

    func switchTo(_ feature: ActiveFeature) async throws {
        switch feature {
        case .chat:
            // Only need LLM
            try? await RunAnywhere.unloadSTTModel()
            try? await RunAnywhere.unloadTTSVoice()

        case .voice:
            // Need all voice components
            try await RunAnywhere.loadSTTModel("whisper-base-onnx")
            try await RunAnywhere.loadTTSVoice("piper-en-us-amy")

        case .none:
            // Free all memory
            try? await RunAnywhere.unloadModel()
            try? await RunAnywhere.unloadSTTModel()
            try? await RunAnywhere.unloadTTSVoice()
        }
    }
}

Monitor Memory

func checkMemoryBeforeOperation() -> Bool {
    let memoryInfo = ProcessInfo.processInfo
    let available = memoryInfo.physicalMemory

    // Require at least 2GB free for LLM operations
    return available > 2_000_000_000
}

Handle Memory Warnings

class AppDelegate: NSObject, UIApplicationDelegate {
    func applicationDidReceiveMemoryWarning(_ application: UIApplication) {
        Task {
            // Unload non-essential models
            try? await RunAnywhere.unloadSTTModel()
            try? await RunAnywhere.unloadTTSVoice()
            try? await RunAnywhere.cleanTempFiles()
        }
    }
}

Model Selection

Choose Appropriate Model Sizes

DeviceRecommended LLMNotes
iPhone 12/13 (4GB)1B Q4May need to unload others
iPhone 14/15 (6GB)1-3B Q4Good for most use cases
iPhone 15 Pro (8GB)3B Q4, some 7BMore headroom
iPad Pro3-7B Q4Depends on RAM
M1+ Mac7B+ Q4Ample memory

Device-Specific Loading

func selectModelForDevice() -> String {
    let memory = ProcessInfo.processInfo.physicalMemory

    switch memory {
    case ..<4_000_000_000:
        return "llama-3.2-1b-instruct-q4"  // 1B for 4GB devices
    case 4_000_000_000..<8_000_000_000:
        return "llama-3.2-3b-instruct-q4"  // 3B for 4-8GB
    default:
        return "llama-3.2-7b-instruct-q4"  // 7B for 8GB+
    }
}

Streaming for Responsiveness

Always Stream for Long Outputs

// ❌ User waits for entire response
let result = try await RunAnywhere.generate(prompt, options: LLMGenerationOptions(maxTokens: 500))

// ✅ User sees tokens immediately
let result = try await RunAnywhere.generateStream(prompt)
for try await token in result.stream {
    updateUI(with: token)
}

Batch UI Updates

func streamWithBatching() async throws {
    let result = try await RunAnywhere.generateStream(prompt)

    var buffer = ""
    var lastUpdate = Date()

    for try await token in result.stream {
        buffer += token

        // Update UI every 50ms instead of every token
        if Date().timeIntervalSince(lastUpdate) > 0.05 {
            await MainActor.run {
                self.displayText += buffer
            }
            buffer = ""
            lastUpdate = Date()
        }
    }

    // Flush remaining
    if !buffer.isEmpty {
        await MainActor.run {
            self.displayText += buffer
        }
    }
}

Threading

Use Appropriate Actors

// ✅ UI updates on MainActor
await MainActor.run {
    self.response = result.text
}

// ✅ Heavy operations stay off main thread
Task.detached {
    try await RunAnywhere.loadModel(modelId)
}

Don’t Block the Main Thread

// ❌ Blocking main thread
@MainActor
func loadModelSync() {
    // This blocks UI!
    RunLoop.current.run(until: Date(timeIntervalSinceNow: 5))
}

// ✅ Async loading with UI feedback
@MainActor
func loadModelAsync() {
    isLoading = true

    Task {
        try await RunAnywhere.loadModel(modelId)
        await MainActor.run {
            isLoading = false
        }
    }
}

Error Recovery

Implement Retries

func generateWithRetry(prompt: String, maxAttempts: Int = 3) async throws -> String {
    for attempt in 1...maxAttempts {
        do {
            return try await RunAnywhere.chat(prompt)
        } catch let error as SDKError where error.code == .timeout {
            if attempt < maxAttempts {
                try await Task.sleep(for: .seconds(1))
                continue
            }
        }
    }
    throw SDKError.general(.timeout, "Max retries exceeded")
}

Graceful Fallbacks

func speak(_ text: String) async {
    do {
        // Try neural voice
        try await RunAnywhere.speak(text)
    } catch {
        // Fall back to system voice
        let synthesizer = AVSpeechSynthesizer()
        let utterance = AVSpeechUtterance(string: text)
        synthesizer.speak(utterance)
    }
}

Storage

Check Before Downloads

func downloadModelSafely(_ model: ModelInfo) async throws {
    let storage = await RunAnywhere.getStorageInfo()
    let required = model.downloadSize ?? 0

    guard storage.availableBytes > required + 500_000_000 else {  // 500MB buffer
        throw SDKError.general(.insufficientStorage, "Not enough storage")
    }

    try await Download.shared.downloadModel(model)
}

Clean Up Regularly

func performMaintenance() async {
    // Clean temp files
    try? await RunAnywhere.cleanTempFiles()

    // Remove unused downloaded models
    let models = try? await RunAnywhere.availableModels()
    for model in models ?? [] {
        if model.isDownloaded && !isModelNeeded(model) {
            // Delete unused model
        }
    }
}

Event Handling

Subscribe to Events

import Combine

class AIViewModel: ObservableObject {
    @Published var isGenerating = false
    @Published var tokensPerSecond = 0.0

    private var cancellables = Set<AnyCancellable>()

    init() {
        RunAnywhere.events.events(for: .llm)
            .receive(on: DispatchQueue.main)
            .sink { [weak self] event in
                self?.handleLLMEvent(event)
            }
            .store(in: &cancellables)
    }

    private func handleLLMEvent(_ event: SDKEvent) {
        switch event.type {
        case "generation.started":
            isGenerating = true
        case "generation.completed":
            isGenerating = false
        default:
            break
        }
    }
}

Testing

Use Development Environment

#if DEBUG
try RunAnywhere.initialize(environment: .development)
RunAnywhere.setLogLevel(.debug)
#endif

Mock for Unit Tests

protocol AIService {
    func generate(_ prompt: String) async throws -> String
}

class RealAIService: AIService {
    func generate(_ prompt: String) async throws -> String {
        try await RunAnywhere.chat(prompt)
    }
}

class MockAIService: AIService {
    func generate(_ prompt: String) async throws -> String {
        return "Mock response for: \(prompt)"
    }
}

Quick Reference

ScenarioRecommendation
App launchInitialize SDK, register modules
Before first usePre-load models
Long text generationUse streaming
Low memory devicesUse smaller models, unload when done
Voice featuresUse Voice Agent for full pipeline
ProductionSet log level to .warning
ErrorsImplement retries and fallbacks
BackgroundUnload models to free memory