Overview
Token streaming allows you to display AI responses as they’re generated, token by token. This provides a much better user experience than waiting for the entire response, especially for longer outputs.Basic Usage
Copy
Ask AI
import { RunAnywhere } from '@runanywhere/core'
const streamResult = await RunAnywhere.generateStream('Write a short story about a robot', {
maxTokens: 200,
})
// Accumulate and display tokens as they arrive
let fullResponse = ''
for await (const token of streamResult.stream) {
fullResponse += token
setResponse(fullResponse) // Update UI state
}
// Get final metrics
const finalResult = await streamResult.result
console.log('Speed:', finalResult.tokensPerSecond, 'tok/s')
API Reference
Copy
Ask AI
await RunAnywhere.generateStream(
prompt: string,
options?: GenerationOptions
): Promise<LLMStreamingResult>
Parameters
Same asgenerate() - see Generation Options.
Returns
Copy
Ask AI
interface LLMStreamingResult {
/** Async iterator yielding tokens one at a time */
stream: AsyncIterable<string>
/** Promise resolving to final result with metrics */
result: Promise<LLMGenerationResult>
/** Cancel the ongoing generation */
cancel: () => void
}
interface LLMGenerationResult {
text: string
tokensUsed: number
modelUsed: string
latencyMs: number
framework: string
tokensPerSecond: number
timeToFirstTokenMs?: number
responseTokens: number
thinkingContent?: string
thinkingTokens: number
inputTokens: number
}
LLMGenerationResult (returned by streaming) differs from GenerationResult (returned by
generate()). Streaming results have tokensPerSecond at the top level, while GenerationResult
nests it under performanceMetrics.Examples
React Native Component
StreamingChat.tsx
Copy
Ask AI
import React, { useState, useCallback } from 'react'
import { View, Text, Button, ScrollView } from 'react-native'
import { RunAnywhere } from '@runanywhere/core'
export function StreamingChat() {
const [response, setResponse] = useState('')
const [isStreaming, setIsStreaming] = useState(false)
const [metrics, setMetrics] = useState<string>('')
const handleStream = useCallback(async () => {
setResponse('')
setMetrics('')
setIsStreaming(true)
try {
const streamResult = await RunAnywhere.generateStream('Explain how neural networks work', {
maxTokens: 300,
temperature: 0.7,
})
// Accumulate tokens
let fullText = ''
for await (const token of streamResult.stream) {
fullText += token
setResponse(fullText)
}
// Show final metrics
const final = await streamResult.result
setMetrics(
`${final.tokensPerSecond?.toFixed(1)} tok/s | ` +
`${final.latencyMs}ms | ${final.tokensUsed} tokens`
)
} catch (error) {
setResponse('Error: ' + (error as Error).message)
} finally {
setIsStreaming(false)
}
}, [])
return (
<View style={{ flex: 1, padding: 16 }}>
<Button
title={isStreaming ? 'Streaming...' : 'Start Streaming'}
onPress={handleStream}
disabled={isStreaming}
/>
<ScrollView style={{ flex: 1, marginTop: 16 }}>
<Text style={{ fontSize: 16, lineHeight: 24 }}>
{response}
{isStreaming && <Text style={{ opacity: 0.5 }}>▊</Text>}
</Text>
</ScrollView>
{metrics && <Text style={{ marginTop: 8, color: '#666', fontSize: 12 }}>{metrics}</Text>}
</View>
)
}
Custom Streaming Hook
useStreamingGenerate.ts
Copy
Ask AI
import { useState, useCallback, useRef } from 'react'
import { RunAnywhere, GenerationOptions, LLMGenerationResult } from '@runanywhere/core'
interface StreamingState {
text: string
isStreaming: boolean
error: Error | null
metrics: LLMGenerationResult | null
}
export function useStreamingGenerate() {
const [state, setState] = useState<StreamingState>({
text: '',
isStreaming: false,
error: null,
metrics: null,
})
const abortRef = useRef(false)
const stream = useCallback(async (prompt: string, options?: GenerationOptions) => {
abortRef.current = false
setState({ text: '', isStreaming: true, error: null, metrics: null })
try {
const result = await RunAnywhere.generateStream(prompt, options)
let accumulated = ''
for await (const token of result.stream) {
if (abortRef.current) break
accumulated += token
setState((s) => ({ ...s, text: accumulated }))
}
const finalMetrics = await result.result
setState((s) => ({ ...s, isStreaming: false, metrics: finalMetrics }))
return finalMetrics
} catch (error) {
const e = error instanceof Error ? error : new Error('Streaming failed')
setState((s) => ({ ...s, isStreaming: false, error: e }))
throw e
}
}, [])
const cancel = useCallback(async () => {
abortRef.current = true
await RunAnywhere.cancelGeneration()
setState((s) => ({ ...s, isStreaming: false }))
}, [])
return { ...state, stream, cancel }
}
With Typing Animation Effect
TypingEffect.tsx
Copy
Ask AI
import React, { useState, useEffect, useCallback } from 'react'
import { Text, View, Button } from 'react-native'
import { RunAnywhere } from '@runanywhere/core'
export function TypingEffect() {
const [tokens, setTokens] = useState<string[]>([])
const [displayedText, setDisplayedText] = useState('')
const [isStreaming, setIsStreaming] = useState(false)
// Animate tokens with a slight delay for effect
useEffect(() => {
if (tokens.length === 0) return
let index = 0
const interval = setInterval(() => {
if (index < tokens.length) {
setDisplayedText(tokens.slice(0, index + 1).join(''))
index++
} else {
clearInterval(interval)
}
}, 20) // 20ms per token
return () => clearInterval(interval)
}, [tokens])
const handleGenerate = useCallback(async () => {
setTokens([])
setDisplayedText('')
setIsStreaming(true)
const result = await RunAnywhere.generateStream('What makes a good programmer?', {
maxTokens: 150,
})
const allTokens: string[] = []
for await (const token of result.stream) {
allTokens.push(token)
}
setTokens(allTokens)
setIsStreaming(false)
}, [])
return (
<View style={{ padding: 16 }}>
<Button
title={isStreaming ? 'Generating...' : 'Generate'}
onPress={handleGenerate}
disabled={isStreaming}
/>
<Text style={{ marginTop: 16, fontSize: 16 }}>
{displayedText}
{isStreaming && '▊'}
</Text>
</View>
)
}
Event-Based Streaming
You can also use the EventBus for streaming:Copy
Ask AI
import { RunAnywhere, EventBus } from '@runanywhere/core'
// Subscribe to generation events
const unsubscribe = RunAnywhere.events.onGeneration((event) => {
switch (event.type) {
case 'started':
console.log('Generation started')
break
case 'tokenGenerated':
// Append token to your UI state
setResponse((prev) => prev + event.token)
break
case 'completed':
console.log('Done:', event.response.tokensUsed, 'tokens')
break
case 'failed':
console.error('Error:', event.error)
break
case 'cancelled':
console.log('Generation cancelled')
break
}
})
// Generate (tokens will be emitted via events)
await RunAnywhere.generate('Write a poem', { maxTokens: 100 })
// Clean up
unsubscribe()
Performance Tips
Streaming has minimal overhead compared to non-streaming generation. The time-to-first-token
(TTFT) is the same, and total generation time is nearly identical.
Optimize UI Updates
For very fast generation, batch UI updates to avoid overwhelming React:Copy
Ask AI
import { useCallback, useRef, useState } from 'react'
function useThrottledText(delay = 16) {
// ~60fps
const [text, setText] = useState('')
const pendingRef = useRef('')
const frameRef = useRef<number>()
const appendToken = useCallback((token: string) => {
pendingRef.current += token
if (!frameRef.current) {
frameRef.current = requestAnimationFrame(() => {
setText(pendingRef.current)
frameRef.current = undefined
})
}
}, [])
const reset = useCallback(() => {
pendingRef.current = ''
setText('')
if (frameRef.current) {
cancelAnimationFrame(frameRef.current)
frameRef.current = undefined
}
}, [])
return { text, appendToken, reset }
}