Skip to main content

Complete Example

Here’s a complete example to get you started with on-device text generation:
import RunAnywhere
import LlamaCPPRuntime

@main
struct MyApp: App {
    init() {
        Task { @MainActor in
            // Register the LlamaCPP module for LLM support
            LlamaCPP.register()

            // Initialize the SDK
            do {
                try RunAnywhere.initialize(
                    apiKey: "<YOUR_API_KEY>",
                    baseURL: "https://api.runanywhere.ai",
                    environment: .production
                )
            } catch {
                print("SDK initialization failed: \(error)")
            }
        }
    }

    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}

Step 1: Initialize the SDK

Initialize the SDK once at app launch:
try RunAnywhere.initialize(
    apiKey: "<YOUR_API_KEY>",       // Required for production
    baseURL: "https://api.runanywhere.ai",
    environment: .production        // .development | .staging | .production
)

Environment Options

EnvironmentLog LevelDescription
.developmentDebugVerbose logging, mock services, local analytics
.stagingInfoTesting with real services
.productionWarningMinimal logging, full authentication, telemetry

Step 2: Register Modules

Register modules at app startup before using their capabilities:
import RunAnywhere
import LlamaCPPRuntime
import ONNXRuntime

@MainActor
func setupSDK() {
    LlamaCPP.register()   // LLM (priority: 100)
    ONNX.register()       // STT + TTS + VAD (priority: 100)
}

Step 3: Generate Text

Simple Chat

let response = try await RunAnywhere.chat("What is the capital of France?")
print(response)  // "The capital of France is Paris."

Full Generation with Metrics

let result = try await RunAnywhere.generate(
    "Explain quantum computing in simple terms",
    options: LLMGenerationOptions(
        maxTokens: 200,
        temperature: 0.7
    )
)

print("Response: \(result.text)")
print("Tokens used: \(result.tokensUsed)")
print("Speed: \(result.tokensPerSecond) tok/s")
print("Latency: \(result.latencyMs)ms")

Step 4: Load Models

// Load an LLM model by ID
try await RunAnywhere.loadModel("llama-3.2-1b-instruct-q4")

// Check if model is loaded
let isLoaded = await RunAnywhere.isModelLoaded
print("Model loaded: \(isLoaded)")

// Get current model
let modelId = await RunAnywhere.getCurrentModelId()

Complete SwiftUI Example

Here’s a full SwiftUI view demonstrating text generation:
import SwiftUI
import RunAnywhere

struct ContentView: View {
    @State private var prompt = ""
    @State private var response = ""
    @State private var isLoading = false
    @State private var isModelLoaded = false

    var body: some View {
        VStack(spacing: 20) {
            // Model status
            HStack {
                Circle()
                    .fill(isModelLoaded ? .green : .gray)
                    .frame(width: 10, height: 10)
                Text(isModelLoaded ? "Model Ready" : "Model Not Loaded")
                    .font(.caption)
            }

            // Input
            TextField("Enter your prompt...", text: $prompt)
                .textFieldStyle(.roundedBorder)
                .padding(.horizontal)

            // Generate button
            Button(action: { Task { await generate() } }) {
                HStack {
                    if isLoading {
                        ProgressView()
                            .scaleEffect(0.8)
                    }
                    Text(isLoading ? "Generating..." : "Generate")
                }
                .frame(maxWidth: .infinity)
                .padding()
                .background(Color.blue)
                .foregroundColor(.white)
                .cornerRadius(10)
            }
            .disabled(isLoading || prompt.isEmpty || !isModelLoaded)
            .padding(.horizontal)

            // Response
            ScrollView {
                Text(response)
                    .padding()
                    .frame(maxWidth: .infinity, alignment: .leading)
            }
            .background(Color.gray.opacity(0.1))
            .cornerRadius(10)
            .padding(.horizontal)
        }
        .padding()
        .task {
            await loadModel()
        }
    }

    func loadModel() async {
        do {
            try await RunAnywhere.loadModel("llama-3.2-1b-instruct-q4")
            isModelLoaded = true
        } catch {
            response = "Failed to load model: \(error.localizedDescription)"
        }
    }

    func generate() async {
        isLoading = true
        defer { isLoading = false }

        do {
            let result = try await RunAnywhere.generate(
                prompt,
                options: LLMGenerationOptions(
                    maxTokens: 200,
                    temperature: 0.7
                )
            )
            response = result.text
        } catch {
            response = "Error: \(error.localizedDescription)"
        }
    }
}

Streaming Example

For a more responsive UI, use streaming generation:
func generateStreaming() async {
    isLoading = true
    response = ""

    do {
        let result = try await RunAnywhere.generateStream(
            prompt,
            options: LLMGenerationOptions(maxTokens: 500)
        )

        // Display tokens as they arrive
        for try await token in result.stream {
            await MainActor.run {
                response += token
            }
        }

        // Get final metrics
        let metrics = try await result.result.value
        print("Generated \(metrics.tokensUsed) tokens at \(metrics.tokensPerSecond) tok/s")

    } catch {
        response = "Error: \(error.localizedDescription)"
    }

    isLoading = false
}

What’s Next?