generate()

The generate() method provides complete control over text generation with detailed performance metrics and customizable options.

Basic Usage

let result = try await RunAnywhere.generate(
    "Explain quantum computing in simple terms",
    options: LLMGenerationOptions(
        maxTokens: 200,
        temperature: 0.7
    )
)

print("Response: \(result.text)")
print("Speed: \(result.tokensPerSecond) tok/s")

Method Signature

public static func generate(
    _ prompt: String,
    options: LLMGenerationOptions? = nil
) async throws -> LLMGenerationResult

Parameters

Parameter	Type	Description
`prompt`	`String`	The text prompt
`options`	`LLMGenerationOptions?`	Generation configuration (optional)

Returns

An LLMGenerationResult containing the response and metrics.

LLMGenerationResult

public struct LLMGenerationResult: Sendable {
    public let text: String              // Generated text
    public let thinkingContent: String?  // Reasoning tokens (for thinking models)
    public let inputTokens: Int          // Prompt tokens
    public let tokensUsed: Int           // Total output tokens
    public let modelUsed: String         // Model ID
    public let latencyMs: TimeInterval   // Total generation time
    public let framework: String?        // Backend framework used
    public let tokensPerSecond: Double   // Generation speed
    public let timeToFirstTokenMs: Double? // Time to first token
    public let thinkingTokens: Int?      // Reasoning token count
    public let responseTokens: Int       // Response token count
}

LLMGenerationOptions

let options = LLMGenerationOptions(
    maxTokens: 100,           // Maximum tokens to generate (default: 100)
    temperature: 0.8,         // Randomness 0.0-2.0 (default: 0.8)
    topP: 1.0,                // Nucleus sampling (default: 1.0)
    stopSequences: ["###"],   // Stop generation at these strings
    streamingEnabled: false,  // Enable token streaming
    preferredFramework: .llamaCpp,  // Preferred backend
    systemPrompt: "You are a helpful assistant."
)

Generation Parameters

Parameter	Type	Default	Description
`maxTokens`	`Int`	100	Maximum tokens to generate
`temperature`	`Float`	0.8	Controls randomness (0.0 = deterministic, 2.0 = very random)
`topP`	`Float`	1.0	Nucleus sampling threshold
`stopSequences`	`[String]`	`[]`	Stop generation at these strings
`streamingEnabled`	`Bool`	false	Enable token-by-token streaming
`preferredFramework`	`InferenceFramework?`	nil	Preferred backend framework
`systemPrompt`	`String?`	nil	System prompt for behavior

Examples

Basic Generation

let result = try await RunAnywhere.generate("Write a haiku about programming")
print(result.text)
print("Generated in \(result.latencyMs)ms")

With Custom Options

let result = try await RunAnywhere.generate(
    "Write a creative story about a robot",
    options: LLMGenerationOptions(
        maxTokens: 500,
        temperature: 1.2,  // More creative
        topP: 0.9,
        stopSequences: ["THE END"]
    )
)

With System Prompt

let result = try await RunAnywhere.generate(
    "What should I cook tonight?",
    options: LLMGenerationOptions(
        maxTokens: 200,
        systemPrompt: "You are a professional chef. Suggest creative recipes with detailed instructions."
    )
)

For Reasoning Models

Some models output their reasoning process. Extract it with thinkingContent:

let result = try await RunAnywhere.generate(
    "Solve: If a train travels 60 mph for 2 hours, how far does it go?",
    options: LLMGenerationOptions(maxTokens: 300)
)

// The main response
print("Answer: \(result.text)")

// The model's reasoning (if available)
if let thinking = result.thinkingContent {
    print("Reasoning: \(thinking)")
}

// Token breakdown
if let thinkingTokens = result.thinkingTokens {
    print("Thinking tokens: \(thinkingTokens)")
}
print("Response tokens: \(result.responseTokens)")

Performance Monitoring

let result = try await RunAnywhere.generate(prompt)

// Performance metrics
print("Model: \(result.modelUsed)")
print("Input tokens: \(result.inputTokens)")
print("Output tokens: \(result.tokensUsed)")
print("Speed: \(String(format: "%.1f", result.tokensPerSecond)) tok/s")
print("Latency: \(String(format: "%.0f", result.latencyMs))ms")

if let ttft = result.timeToFirstTokenMs {
    print("Time to first token: \(String(format: "%.0f", ttft))ms")
}

Structured Output

Generate type-safe structured output using the Generatable protocol:

struct Recipe: Generatable {
    let name: String
    let ingredients: [String]
    let steps: [String]
    let cookingTime: Int

    static var jsonSchema: String {
        """
        {
          "type": "object",
          "properties": {
            "name": { "type": "string" },
            "ingredients": { "type": "array", "items": { "type": "string" } },
            "steps": { "type": "array", "items": { "type": "string" } },
            "cookingTime": { "type": "integer" }
          },
          "required": ["name", "ingredients", "steps", "cookingTime"]
        }
        """
    }
}

let recipe: Recipe = try await RunAnywhere.generateStructured(
    Recipe.self,
    prompt: "Create a simple pasta recipe"
)

print("Recipe: \(recipe.name)")
print("Ingredients: \(recipe.ingredients.joined(separator: ", "))")
print("Cook time: \(recipe.cookingTime) minutes")

Error Handling

do {
    let result = try await RunAnywhere.generate(prompt, options: options)
    print(result.text)
} catch let error as SDKError {
    switch error.code {
    case .notInitialized:
        print("SDK not initialized")
    case .modelNotFound:
        print("Model not loaded")
    case .generationFailed:
        print("Generation failed: \(error.message)")
    case .contextTooLong:
        print("Prompt too long for model's context window")
    default:
        print("Error: \(error.localizedDescription)")
    }
}

Temperature Guide

Temperature	Use Case
0.0	Deterministic, factual answers
0.3-0.5	Focused, coherent responses
0.7-0.8	Balanced creativity (default)
1.0-1.2	Creative writing, brainstorming
1.5+	Very random, experimental

generateStream()

Stream tokens in real-time →

System Prompts

Control model behavior →

Getting Started

Swift SDK

Kotlin SDK

React Native SDK

Flutter SDK

Web SDK

Vibe Coding

Basic Usage

Method Signature

Parameters

Returns

LLMGenerationResult

LLMGenerationOptions

Generation Parameters

Examples

Basic Generation

With Custom Options

With System Prompt

For Reasoning Models

Performance Monitoring

Structured Output

Error Handling

Temperature Guide

generateStream()

System Prompts

Getting Started

Swift SDK

Kotlin SDK

React Native SDK

Flutter SDK

Web SDK

Vibe Coding

​Basic Usage

​Method Signature

​Parameters

​Returns

​LLMGenerationResult

​LLMGenerationOptions

​Generation Parameters

​Examples

​Basic Generation

​With Custom Options

​With System Prompt

​For Reasoning Models

​Performance Monitoring

​Structured Output

​Error Handling

​Temperature Guide

generateStream()

System Prompts

Basic Usage

Method Signature

Parameters

Returns

LLMGenerationResult

LLMGenerationOptions

Generation Parameters

Examples

Basic Generation

With Custom Options

With System Prompt

For Reasoning Models

Performance Monitoring

Structured Output

Error Handling

Temperature Guide