generateStream() method enables real-time token streaming, perfect for building responsive chat interfaces where text appears progressively.
Basic Usage
Copy
Ask AI
let result = try await RunAnywhere.generateStream(
"Tell me a story about a brave knight",
options: LLMGenerationOptions(maxTokens: 500)
)
// Display tokens as they arrive
for try await token in result.stream {
print(token, terminator: "")
}
// Get final metrics after streaming completes
let metrics = try await result.result.value
print("\n\nGenerated \(metrics.tokensUsed) tokens at \(metrics.tokensPerSecond) tok/s")
Method Signature
Copy
Ask AI
public static func generateStream(
_ prompt: String,
options: LLMGenerationOptions? = nil
) async throws -> LLMStreamingResult
Parameters
| Parameter | Type | Description |
|---|---|---|
prompt | String | The text prompt |
options | LLMGenerationOptions? | Generation configuration (optional) |
Returns
AnLLMStreamingResult containing:
Copy
Ask AI
public struct LLMStreamingResult: Sendable {
// Async stream of tokens
public let stream: AsyncThrowingStream<String, Error>
// Task that completes with final metrics
public let result: Task<LLMGenerationResult, Error>
}
SwiftUI Integration
Basic Streaming View
Copy
Ask AI
struct StreamingView: View {
@State private var prompt = ""
@State private var response = ""
@State private var isStreaming = false
@State private var tokensPerSecond: Double = 0
var body: some View {
VStack(spacing: 16) {
// Prompt input
TextField("Enter your prompt...", text: $prompt)
.textFieldStyle(.roundedBorder)
// Generate button
Button(action: { Task { await generate() } }) {
Label(
isStreaming ? "Generating..." : "Generate",
systemImage: isStreaming ? "stop.fill" : "play.fill"
)
}
.disabled(prompt.isEmpty)
// Streaming response
ScrollView {
Text(response)
.frame(maxWidth: .infinity, alignment: .leading)
}
.frame(maxHeight: 300)
.background(Color.gray.opacity(0.1))
.cornerRadius(8)
// Metrics
if tokensPerSecond > 0 {
Text("\(String(format: "%.1f", tokensPerSecond)) tokens/sec")
.font(.caption)
.foregroundColor(.secondary)
}
}
.padding()
}
func generate() async {
response = ""
isStreaming = true
do {
let result = try await RunAnywhere.generateStream(
prompt,
options: LLMGenerationOptions(maxTokens: 500)
)
// Stream tokens to UI
for try await token in result.stream {
await MainActor.run {
response += token
}
}
// Get final metrics
let metrics = try await result.result.value
await MainActor.run {
tokensPerSecond = metrics.tokensPerSecond
isStreaming = false
}
} catch {
await MainActor.run {
response = "Error: \(error.localizedDescription)"
isStreaming = false
}
}
}
}
With Typing Animation Effect
Copy
Ask AI
class StreamingViewModel: ObservableObject {
@Published var displayedText = ""
@Published var isStreaming = false
@Published var metrics: LLMGenerationResult?
private var fullText = ""
func stream(prompt: String) async {
await MainActor.run {
displayedText = ""
fullText = ""
isStreaming = true
metrics = nil
}
do {
let result = try await RunAnywhere.generateStream(
prompt,
options: LLMGenerationOptions(maxTokens: 500, temperature: 0.7)
)
for try await token in result.stream {
fullText += token
// Update UI on main thread
await MainActor.run {
displayedText = fullText
}
}
let finalMetrics = try await result.result.value
await MainActor.run {
metrics = finalMetrics
isStreaming = false
}
} catch {
await MainActor.run {
displayedText = "Error: \(error.localizedDescription)"
isStreaming = false
}
}
}
func cancel() async {
await RunAnywhere.cancelGeneration()
await MainActor.run {
isStreaming = false
}
}
}
Cancellation
Cancel streaming mid-generation:Copy
Ask AI
// Start streaming in a task
let streamTask = Task {
let result = try await RunAnywhere.generateStream(prompt)
for try await token in result.stream {
print(token, terminator: "")
}
}
// Cancel after 5 seconds
try await Task.sleep(for: .seconds(5))
await RunAnywhere.cancelGeneration()
streamTask.cancel()
Error Handling
Copy
Ask AI
do {
let result = try await RunAnywhere.generateStream(prompt)
for try await token in result.stream {
// Handle each token
await updateUI(with: token)
}
// Stream completed successfully
let metrics = try await result.result.value
} catch is CancellationError {
print("Generation was cancelled")
} catch let error as SDKError {
switch error.code {
case .generationFailed:
print("Generation failed: \(error.message)")
case .streamingNotSupported:
print("Model doesn't support streaming")
default:
print("Error: \(error.localizedDescription)")
}
}
Check Streaming Support
Not all models support streaming. Check before calling:Copy
Ask AI
if await RunAnywhere.supportsLLMStreaming {
let result = try await RunAnywhere.generateStream(prompt)
// ... handle stream
} else {
// Fall back to non-streaming
let result = try await RunAnywhere.generate(prompt)
print(result.text)
}
Performance Tips
Batch UI updates
Batch UI updates
Instead of updating the UI for every token, batch updates:
Copy
Ask AI
var buffer = ""
var lastUpdate = Date()
for try await token in result.stream {
buffer += token
// Update UI every 50ms
if Date().timeIntervalSince(lastUpdate) > 0.05 {
await MainActor.run { displayedText += buffer }
buffer = ""
lastUpdate = Date()
}
}
// Flush remaining buffer
if !buffer.isEmpty {
await MainActor.run { displayedText += buffer }
}
Use attributedString for markdown
Use attributedString for markdown
For markdown rendering, consider processing the complete text periodically rather than on every token.
Memory management
Memory management
For very long generations, consider trimming displayed history to prevent memory issues.
Complete Chat Example
Copy
Ask AI
struct ChatMessage: Identifiable {
let id = UUID()
let role: Role
var content: String
enum Role {
case user, assistant
}
}
@Observable
class ChatViewModel {
var messages: [ChatMessage] = []
var isStreaming = false
func send(_ text: String) async {
// Add user message
messages.append(ChatMessage(role: .user, content: text))
// Add placeholder for assistant
let assistantMessage = ChatMessage(role: .assistant, content: "")
messages.append(assistantMessage)
let messageIndex = messages.count - 1
isStreaming = true
do {
let result = try await RunAnywhere.generateStream(
text,
options: LLMGenerationOptions(maxTokens: 500)
)
for try await token in result.stream {
await MainActor.run {
messages[messageIndex].content += token
}
}
} catch {
await MainActor.run {
messages[messageIndex].content = "Error: \(error.localizedDescription)"
}
}
isStreaming = false
}
}