Skip to main content

Complete Example

Here’s a complete example to get you started with on-device text generation:
import RunAnywhere
import LlamaCPPRuntime
import ONNXRuntime

@main
struct MyApp: App {
    init() {
        Task { @MainActor in
            do {
                // 1. Initialize the SDK first
                try RunAnywhere.initialize(environment: .development)

                // 2. Register backend modules
                LlamaCPP.register()
                ONNX.register()

                // 3. Register models
                ModelService.registerDefaultModels()

                print("SDK v\(RunAnywhere.version) initialized")
            } catch {
                print("SDK initialization failed: \(error)")
            }
        }
    }

    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}

Step 1: Initialize the SDK

Initialize the SDK once at app launch. In development mode, no API key or base URL is required:
try RunAnywhere.initialize(environment: .development)
For production, provide authentication:
try RunAnywhere.initialize(
    apiKey: "<YOUR_API_KEY>",
    baseURL: "https://api.runanywhere.ai",
    environment: .production
)

Environment Options

EnvironmentLog LevelDescription
.developmentDebugVerbose logging, no auth required
.stagingInfoTesting with real services
.productionWarningMinimal logging, full authentication, telemetry

Step 2: Register Backend Modules

Register modules after SDK initialization but before registering models:
import RunAnywhere
import LlamaCPPRuntime
import ONNXRuntime

@MainActor
func setupSDK() {
    LlamaCPP.register()   // LLM + VLM (GGUF models via llama.cpp with Metal GPU)
    ONNX.register()       // STT + TTS + VAD (via Sherpa-ONNX)
}
Backend registration order matters. You must register backends before registering or loading models. LlamaCPP.register() and ONNX.register() must be called after RunAnywhere.initialize().

Step 3: Register Models

Register models before downloading or loading. Each model needs an ID, name, URL, framework, and memory requirement:
// LLM model (single GGUF file)
RunAnywhere.registerModel(
    id: "lfm2-350m-q4_k_m",
    name: "LFM2 350M Q4_K_M",
    url: URL(string: "https://huggingface.co/LiquidAI/LFM2-350M-GGUF/resolve/main/LFM2-350M-Q4_K_M.gguf")!,
    framework: .llamaCpp,
    memoryRequirement: 300_000_000
)

// STT model (tar.gz archive)
RunAnywhere.registerModel(
    id: "sherpa-onnx-whisper-tiny.en",
    name: "Sherpa Whisper Tiny",
    url: URL(string: "https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz")!,
    framework: .onnx,
    modality: .speechRecognition,
    artifactType: .archive(.tarGz, structure: .nestedDirectory),
    memoryRequirement: 75_000_000
)

// TTS voice model (tar.gz archive)
RunAnywhere.registerModel(
    id: "vits-piper-en_US-lessac-medium",
    name: "Piper TTS English",
    url: URL(string: "https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz")!,
    framework: .onnx,
    modality: .speechSynthesis,
    artifactType: .archive(.tarGz, structure: .nestedDirectory),
    memoryRequirement: 65_000_000
)
For multi-file models (e.g., VLM with a projector):
RunAnywhere.registerMultiFileModel(
    id: "smolvlm-256m-instruct",
    name: "SmolVLM 256M Instruct",
    files: [
        ModelFileDescriptor(url: URL(string: "https://huggingface.co/.../SmolVLM-Q8_0.gguf")!, filename: "SmolVLM-Q8_0.gguf"),
        ModelFileDescriptor(url: URL(string: "https://huggingface.co/.../mmproj-f16.gguf")!, filename: "mmproj-f16.gguf"),
    ],
    framework: .llamaCpp,
    modality: .multimodal,
    memoryRequirement: 365_000_000
)

Step 4: Download and Load Models

Download models with progress tracking, then load into memory:
// Download with progress
let progressStream = try await RunAnywhere.downloadModel(modelId: "lfm2-350m-q4_k_m")
for await progress in progressStream {
    print("Download: \(Int(progress.overallProgress * 100))%")
    if progress.stage == .completed { break }
}

// Load the model
try await RunAnywhere.loadModel(modelId: "lfm2-350m-q4_k_m")

// Check if loaded
let isLoaded = await RunAnywhere.isModelLoaded
print("Model loaded: \(isLoaded)")
The SDK caches downloaded models. On subsequent launches, loadModel() succeeds immediately without re-downloading. Use a try-then-download pattern: attempt loadModel() first, and only call downloadModel() if it fails.

Step 5: Generate Text

Simple Chat

let response = try await RunAnywhere.chat("What is the capital of France?")
print(response)  // "The capital of France is Paris."

Full Generation with Metrics

let result = try await RunAnywhere.generate(
    "Explain quantum computing in simple terms",
    options: LLMGenerationOptions(
        maxTokens: 200,
        temperature: 0.7
    )
)

print("Response: \(result.text)")
print("Tokens used: \(result.tokensUsed)")
print("Speed: \(result.tokensPerSecond) tok/s")
print("Latency: \(result.latencyMs)ms")

Complete SwiftUI Example

Here’s a full SwiftUI view demonstrating text generation:
import SwiftUI
import RunAnywhere

struct ContentView: View {
    @State private var prompt = ""
    @State private var response = ""
    @State private var isLoading = false
    @State private var isModelLoaded = false

    var body: some View {
        VStack(spacing: 20) {
            // Model status
            HStack {
                Circle()
                    .fill(isModelLoaded ? .green : .gray)
                    .frame(width: 10, height: 10)
                Text(isModelLoaded ? "Model Ready" : "Model Not Loaded")
                    .font(.caption)
            }

            // Input
            TextField("Enter your prompt...", text: $prompt)
                .textFieldStyle(.roundedBorder)
                .padding(.horizontal)

            // Generate button
            Button(action: { Task { await generate() } }) {
                HStack {
                    if isLoading {
                        ProgressView()
                            .scaleEffect(0.8)
                    }
                    Text(isLoading ? "Generating..." : "Generate")
                }
                .frame(maxWidth: .infinity)
                .padding()
                .background(Color.blue)
                .foregroundColor(.white)
                .cornerRadius(10)
            }
            .disabled(isLoading || prompt.isEmpty || !isModelLoaded)
            .padding(.horizontal)

            // Response
            ScrollView {
                Text(response)
                    .padding()
                    .frame(maxWidth: .infinity, alignment: .leading)
            }
            .background(Color.gray.opacity(0.1))
            .cornerRadius(10)
            .padding(.horizontal)
        }
        .padding()
        .task {
            await loadModel()
        }
    }

    func loadModel() async {
        do {
            // Try loading (succeeds if already downloaded)
            try await RunAnywhere.loadModel(modelId: "lfm2-350m-q4_k_m")
            isModelLoaded = true
        } catch {
            // Download first, then load
            do {
                let progress = try await RunAnywhere.downloadModel(modelId: "lfm2-350m-q4_k_m")
                for await p in progress {
                    if p.stage == .completed { break }
                }
                try await RunAnywhere.loadModel(modelId: "lfm2-350m-q4_k_m")
                isModelLoaded = true
            } catch {
                response = "Failed to load model: \(error.localizedDescription)"
            }
        }
    }

    func generate() async {
        isLoading = true
        defer { isLoading = false }

        do {
            let result = try await RunAnywhere.generate(
                prompt,
                options: LLMGenerationOptions(
                    maxTokens: 200,
                    temperature: 0.7
                )
            )
            response = result.text
        } catch {
            response = "Error: \(error.localizedDescription)"
        }
    }
}

Streaming Example

For a more responsive UI, use streaming generation:
func generateStreaming() async {
    isLoading = true
    response = ""

    do {
        let result = try await RunAnywhere.generateStream(
            prompt,
            options: LLMGenerationOptions(maxTokens: 500)
        )

        // Display tokens as they arrive
        for try await token in result.stream {
            await MainActor.run {
                response += token
            }
        }

        // Get final metrics
        let metrics = try await result.result.value
        print("Generated \(metrics.tokensUsed) tokens at \(metrics.tokensPerSecond) tok/s")

    } catch {
        response = "Error: \(error.localizedDescription)"
    }

    isLoading = false
}

What’s Next?