Documentation Index Fetch the complete documentation index at: https://docs.runanywhere.ai/llms.txt
Use this file to discover all available pages before exploring further.
Complete Example
Here’s a complete example to get you started with on-device text generation:
import RunAnywhere
import LlamaCPPRuntime
import ONNXRuntime
@main
struct MyApp : App {
init () {
Task { @MainActor in
do {
// 1. Initialize the SDK first
try RunAnywhere. initialize ( environment : . development )
// 2. Register backend modules
LlamaCPP. register ()
ONNX. register ()
// 3. Register models
ModelService. registerDefaultModels ()
print ( "SDK v \( RunAnywhere. version ) initialized" )
} catch {
print ( "SDK initialization failed: \( error ) " )
}
}
}
var body: some Scene {
WindowGroup {
ContentView ()
}
}
}
Step 1: Initialize the SDK
Initialize the SDK once at app launch. In development mode, no API key or base URL is required:
try RunAnywhere. initialize ( environment : . development )
For production, provide authentication:
try RunAnywhere. initialize (
apiKey : "<YOUR_API_KEY>" ,
baseURL : "https://api.runanywhere.ai" ,
environment : . production
)
Environment Options
Environment Log Level Description .developmentDebug Verbose logging, no auth required .stagingInfo Testing with real services .productionWarning Minimal logging, full authentication, telemetry
Step 2: Register Backend Modules
Register modules after SDK initialization but before registering models:
import RunAnywhere
import LlamaCPPRuntime
import ONNXRuntime
@MainActor
func setupSDK () {
LlamaCPP. register () // LLM + VLM (GGUF models via llama.cpp with Metal GPU)
ONNX. register () // STT + TTS + VAD (via Sherpa-ONNX)
}
Backend registration order matters. You must register backends before registering or loading
models. LlamaCPP.register() and ONNX.register() must be called after
RunAnywhere.initialize().
Step 3: Register Models
Register models before downloading or loading. Each model needs an ID, name, URL, framework, and memory requirement:
// LLM model (single GGUF file)
RunAnywhere. registerModel (
id : "lfm2-350m-q4_k_m" ,
name : "LFM2 350M Q4_K_M" ,
url : URL ( string : "https://huggingface.co/LiquidAI/LFM2-350M-GGUF/resolve/main/LFM2-350M-Q4_K_M.gguf" ) ! ,
framework : . llamaCpp ,
memoryRequirement : 300_000_000
)
// STT model (tar.gz archive)
RunAnywhere. registerModel (
id : "sherpa-onnx-whisper-tiny.en" ,
name : "Sherpa Whisper Tiny" ,
url : URL ( string : "https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz" ) ! ,
framework : . onnx ,
modality : . speechRecognition ,
artifactType : . archive (. tarGz , structure : . nestedDirectory ),
memoryRequirement : 75_000_000
)
// TTS voice model (tar.gz archive)
RunAnywhere. registerModel (
id : "vits-piper-en_US-lessac-medium" ,
name : "Piper TTS English" ,
url : URL ( string : "https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz" ) ! ,
framework : . onnx ,
modality : . speechSynthesis ,
artifactType : . archive (. tarGz , structure : . nestedDirectory ),
memoryRequirement : 65_000_000
)
For multi-file models (e.g., VLM with a projector):
RunAnywhere. registerMultiFileModel (
id : "smolvlm-256m-instruct" ,
name : "SmolVLM 256M Instruct" ,
files : [
ModelFileDescriptor ( url : URL ( string : "https://huggingface.co/.../SmolVLM-Q8_0.gguf" ) ! , filename : "SmolVLM-Q8_0.gguf" ),
ModelFileDescriptor ( url : URL ( string : "https://huggingface.co/.../mmproj-f16.gguf" ) ! , filename : "mmproj-f16.gguf" ),
],
framework : . llamaCpp ,
modality : . multimodal ,
memoryRequirement : 365_000_000
)
Step 4: Download and Load Models
Download models with progress tracking, then load into memory:
// Download with progress
let progressStream = try await RunAnywhere. downloadModel ( modelId : "lfm2-350m-q4_k_m" )
for await progress in progressStream {
print ( "Download: \( Int (progress. overallProgress * 100 ) ) %" )
if progress.stage == .completed { break }
}
// Load the model
try await RunAnywhere. loadModel ( modelId : "lfm2-350m-q4_k_m" )
// Check if loaded
let isLoaded = await RunAnywhere. isModelLoaded
print ( "Model loaded: \( isLoaded ) " )
The SDK caches downloaded models. On subsequent launches, loadModel() succeeds immediately
without re-downloading. Use a try-then-download pattern: attempt loadModel() first, and only
call downloadModel() if it fails.
Step 5: Generate Text
Simple Chat
let response = try await RunAnywhere. chat ( "What is the capital of France?" )
print (response) // "The capital of France is Paris."
Full Generation with Metrics
let result = try await RunAnywhere. generate (
"Explain quantum computing in simple terms" ,
options : LLMGenerationOptions (
maxTokens : 200 ,
temperature : 0.7
)
)
print ( "Response: \( result. text ) " )
print ( "Tokens used: \( result. tokensUsed ) " )
print ( "Speed: \( result. tokensPerSecond ) tok/s" )
print ( "Latency: \( result. latencyMs ) ms" )
Complete SwiftUI Example
Here’s a full SwiftUI view demonstrating text generation:
import SwiftUI
import RunAnywhere
struct ContentView : View {
@State private var prompt = ""
@State private var response = ""
@State private var isLoading = false
@State private var isModelLoaded = false
var body: some View {
VStack ( spacing : 20 ) {
// Model status
HStack {
Circle ()
. fill (isModelLoaded ? . green : . gray )
. frame ( width : 10 , height : 10 )
Text (isModelLoaded ? "Model Ready" : "Model Not Loaded" )
. font (. caption )
}
// Input
TextField ( "Enter your prompt..." , text : $prompt)
. textFieldStyle (. roundedBorder )
. padding (. horizontal )
// Generate button
Button ( action : { Task { await generate () } }) {
HStack {
if isLoading {
ProgressView ()
. scaleEffect ( 0.8 )
}
Text (isLoading ? "Generating..." : "Generate" )
}
. frame ( maxWidth : . infinity )
. padding ()
. background (Color. blue )
. foregroundColor (. white )
. cornerRadius ( 10 )
}
. disabled (isLoading || prompt. isEmpty || ! isModelLoaded)
. padding (. horizontal )
// Response
ScrollView {
Text (response)
. padding ()
. frame ( maxWidth : . infinity , alignment : . leading )
}
. background (Color. gray . opacity ( 0.1 ))
. cornerRadius ( 10 )
. padding (. horizontal )
}
. padding ()
. task {
await loadModel ()
}
}
func loadModel () async {
do {
// Try loading (succeeds if already downloaded)
try await RunAnywhere. loadModel ( modelId : "lfm2-350m-q4_k_m" )
isModelLoaded = true
} catch {
// Download first, then load
do {
let progress = try await RunAnywhere. downloadModel ( modelId : "lfm2-350m-q4_k_m" )
for await p in progress {
if p.stage == .completed { break }
}
try await RunAnywhere. loadModel ( modelId : "lfm2-350m-q4_k_m" )
isModelLoaded = true
} catch {
response = "Failed to load model: \( error. localizedDescription ) "
}
}
}
func generate () async {
isLoading = true
defer { isLoading = false }
do {
let result = try await RunAnywhere. generate (
prompt,
options : LLMGenerationOptions (
maxTokens : 200 ,
temperature : 0.7
)
)
response = result. text
} catch {
response = "Error: \( error. localizedDescription ) "
}
}
}
Streaming Example
For a more responsive UI, use streaming generation:
func generateStreaming () async {
isLoading = true
response = ""
do {
let result = try await RunAnywhere. generateStream (
prompt,
options : LLMGenerationOptions ( maxTokens : 500 )
)
// Display tokens as they arrive
for try await token in result.stream {
await MainActor. run {
response += token
}
}
// Get final metrics
let metrics = try await result. result . value
print ( "Generated \( metrics. tokensUsed ) tokens at \( metrics. tokensPerSecond ) tok/s" )
} catch {
response = "Error: \( error. localizedDescription ) "
}
isLoading = false
}
What’s Next?
LLM Guide Learn about text generation options
Speech-to-Text Add voice transcription
Text-to-Speech Generate speech from text
Voice Agent Build complete voice experiences