Complete Example
Here’s a complete example to get you started with on-device text generation:Copy
Ask AI
import RunAnywhere
import LlamaCPPRuntime
import ONNXRuntime
@main
struct MyApp: App {
init() {
Task { @MainActor in
do {
// 1. Initialize the SDK first
try RunAnywhere.initialize(environment: .development)
// 2. Register backend modules
LlamaCPP.register()
ONNX.register()
// 3. Register models
ModelService.registerDefaultModels()
print("SDK v\(RunAnywhere.version) initialized")
} catch {
print("SDK initialization failed: \(error)")
}
}
}
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
Step 1: Initialize the SDK
Initialize the SDK once at app launch. In development mode, no API key or base URL is required:Copy
Ask AI
try RunAnywhere.initialize(environment: .development)
Copy
Ask AI
try RunAnywhere.initialize(
apiKey: "<YOUR_API_KEY>",
baseURL: "https://api.runanywhere.ai",
environment: .production
)
Environment Options
| Environment | Log Level | Description |
|---|---|---|
.development | Debug | Verbose logging, no auth required |
.staging | Info | Testing with real services |
.production | Warning | Minimal logging, full authentication, telemetry |
Step 2: Register Backend Modules
Register modules after SDK initialization but before registering models:Copy
Ask AI
import RunAnywhere
import LlamaCPPRuntime
import ONNXRuntime
@MainActor
func setupSDK() {
LlamaCPP.register() // LLM + VLM (GGUF models via llama.cpp with Metal GPU)
ONNX.register() // STT + TTS + VAD (via Sherpa-ONNX)
}
Backend registration order matters. You must register backends before registering or loading
models.
LlamaCPP.register() and ONNX.register() must be called after
RunAnywhere.initialize().Step 3: Register Models
Register models before downloading or loading. Each model needs an ID, name, URL, framework, and memory requirement:Copy
Ask AI
// LLM model (single GGUF file)
RunAnywhere.registerModel(
id: "lfm2-350m-q4_k_m",
name: "LFM2 350M Q4_K_M",
url: URL(string: "https://huggingface.co/LiquidAI/LFM2-350M-GGUF/resolve/main/LFM2-350M-Q4_K_M.gguf")!,
framework: .llamaCpp,
memoryRequirement: 300_000_000
)
// STT model (tar.gz archive)
RunAnywhere.registerModel(
id: "sherpa-onnx-whisper-tiny.en",
name: "Sherpa Whisper Tiny",
url: URL(string: "https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz")!,
framework: .onnx,
modality: .speechRecognition,
artifactType: .archive(.tarGz, structure: .nestedDirectory),
memoryRequirement: 75_000_000
)
// TTS voice model (tar.gz archive)
RunAnywhere.registerModel(
id: "vits-piper-en_US-lessac-medium",
name: "Piper TTS English",
url: URL(string: "https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz")!,
framework: .onnx,
modality: .speechSynthesis,
artifactType: .archive(.tarGz, structure: .nestedDirectory),
memoryRequirement: 65_000_000
)
Copy
Ask AI
RunAnywhere.registerMultiFileModel(
id: "smolvlm-256m-instruct",
name: "SmolVLM 256M Instruct",
files: [
ModelFileDescriptor(url: URL(string: "https://huggingface.co/.../SmolVLM-Q8_0.gguf")!, filename: "SmolVLM-Q8_0.gguf"),
ModelFileDescriptor(url: URL(string: "https://huggingface.co/.../mmproj-f16.gguf")!, filename: "mmproj-f16.gguf"),
],
framework: .llamaCpp,
modality: .multimodal,
memoryRequirement: 365_000_000
)
Step 4: Download and Load Models
Download models with progress tracking, then load into memory:Copy
Ask AI
// Download with progress
let progressStream = try await RunAnywhere.downloadModel(modelId: "lfm2-350m-q4_k_m")
for await progress in progressStream {
print("Download: \(Int(progress.overallProgress * 100))%")
if progress.stage == .completed { break }
}
// Load the model
try await RunAnywhere.loadModel(modelId: "lfm2-350m-q4_k_m")
// Check if loaded
let isLoaded = await RunAnywhere.isModelLoaded
print("Model loaded: \(isLoaded)")
The SDK caches downloaded models. On subsequent launches,
loadModel() succeeds immediately
without re-downloading. Use a try-then-download pattern: attempt loadModel() first, and only
call downloadModel() if it fails.Step 5: Generate Text
Simple Chat
Copy
Ask AI
let response = try await RunAnywhere.chat("What is the capital of France?")
print(response) // "The capital of France is Paris."
Full Generation with Metrics
Copy
Ask AI
let result = try await RunAnywhere.generate(
"Explain quantum computing in simple terms",
options: LLMGenerationOptions(
maxTokens: 200,
temperature: 0.7
)
)
print("Response: \(result.text)")
print("Tokens used: \(result.tokensUsed)")
print("Speed: \(result.tokensPerSecond) tok/s")
print("Latency: \(result.latencyMs)ms")
Complete SwiftUI Example
Here’s a full SwiftUI view demonstrating text generation:Copy
Ask AI
import SwiftUI
import RunAnywhere
struct ContentView: View {
@State private var prompt = ""
@State private var response = ""
@State private var isLoading = false
@State private var isModelLoaded = false
var body: some View {
VStack(spacing: 20) {
// Model status
HStack {
Circle()
.fill(isModelLoaded ? .green : .gray)
.frame(width: 10, height: 10)
Text(isModelLoaded ? "Model Ready" : "Model Not Loaded")
.font(.caption)
}
// Input
TextField("Enter your prompt...", text: $prompt)
.textFieldStyle(.roundedBorder)
.padding(.horizontal)
// Generate button
Button(action: { Task { await generate() } }) {
HStack {
if isLoading {
ProgressView()
.scaleEffect(0.8)
}
Text(isLoading ? "Generating..." : "Generate")
}
.frame(maxWidth: .infinity)
.padding()
.background(Color.blue)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(isLoading || prompt.isEmpty || !isModelLoaded)
.padding(.horizontal)
// Response
ScrollView {
Text(response)
.padding()
.frame(maxWidth: .infinity, alignment: .leading)
}
.background(Color.gray.opacity(0.1))
.cornerRadius(10)
.padding(.horizontal)
}
.padding()
.task {
await loadModel()
}
}
func loadModel() async {
do {
// Try loading (succeeds if already downloaded)
try await RunAnywhere.loadModel(modelId: "lfm2-350m-q4_k_m")
isModelLoaded = true
} catch {
// Download first, then load
do {
let progress = try await RunAnywhere.downloadModel(modelId: "lfm2-350m-q4_k_m")
for await p in progress {
if p.stage == .completed { break }
}
try await RunAnywhere.loadModel(modelId: "lfm2-350m-q4_k_m")
isModelLoaded = true
} catch {
response = "Failed to load model: \(error.localizedDescription)"
}
}
}
func generate() async {
isLoading = true
defer { isLoading = false }
do {
let result = try await RunAnywhere.generate(
prompt,
options: LLMGenerationOptions(
maxTokens: 200,
temperature: 0.7
)
)
response = result.text
} catch {
response = "Error: \(error.localizedDescription)"
}
}
}
Streaming Example
For a more responsive UI, use streaming generation:Copy
Ask AI
func generateStreaming() async {
isLoading = true
response = ""
do {
let result = try await RunAnywhere.generateStream(
prompt,
options: LLMGenerationOptions(maxTokens: 500)
)
// Display tokens as they arrive
for try await token in result.stream {
await MainActor.run {
response += token
}
}
// Get final metrics
let metrics = try await result.result.value
print("Generated \(metrics.tokensUsed) tokens at \(metrics.tokensPerSecond) tok/s")
} catch {
response = "Error: \(error.localizedDescription)"
}
isLoading = false
}