Overview
Token streaming allows you to display AI responses as they’re generated, token by token. This provides a much better user experience than waiting for the entire response, especially for longer outputs.Basic Usage
Copy
Ask AI
import { RunAnywhere } from '@runanywhere/core'
const streamResult = await RunAnywhere.generateStream('Write a short story about a robot', {
maxTokens: 200,
})
// Display tokens as they arrive
for await (const token of streamResult.stream) {
process.stdout.write(token) // Each token as it's generated
}
// Get final metrics
const finalResult = await streamResult.result
console.log('\nSpeed:', finalResult.performanceMetrics.tokensPerSecond, 'tok/s')
API Reference
Copy
Ask AI
await RunAnywhere.generateStream(
prompt: string,
options?: GenerationOptions
): Promise<LLMStreamingResult>
Parameters
Same asgenerate() - see Generation Options.
Returns
Copy
Ask AI
interface LLMStreamingResult {
/** Async iterator yielding tokens one at a time */
stream: AsyncIterable<string>
/** Promise resolving to final GenerationResult with metrics */
result: Promise<GenerationResult>
}
Examples
React Native Component
StreamingChat.tsx
Copy
Ask AI
import React, { useState, useCallback } from 'react'
import { View, Text, Button, ScrollView } from 'react-native'
import { RunAnywhere } from '@runanywhere/core'
export function StreamingChat() {
const [response, setResponse] = useState('')
const [isStreaming, setIsStreaming] = useState(false)
const [metrics, setMetrics] = useState<string>('')
const handleStream = useCallback(async () => {
setResponse('')
setMetrics('')
setIsStreaming(true)
try {
const streamResult = await RunAnywhere.generateStream('Explain how neural networks work', {
maxTokens: 300,
temperature: 0.7,
})
// Accumulate tokens
let fullText = ''
for await (const token of streamResult.stream) {
fullText += token
setResponse(fullText)
}
// Show final metrics
const final = await streamResult.result
setMetrics(
`${final.performanceMetrics.tokensPerSecond?.toFixed(1)} tok/s | ` +
`${final.latencyMs}ms | ${final.tokensUsed} tokens`
)
} catch (error) {
setResponse('Error: ' + (error as Error).message)
} finally {
setIsStreaming(false)
}
}, [])
return (
<View style={{ flex: 1, padding: 16 }}>
<Button
title={isStreaming ? 'Streaming...' : 'Start Streaming'}
onPress={handleStream}
disabled={isStreaming}
/>
<ScrollView style={{ flex: 1, marginTop: 16 }}>
<Text style={{ fontSize: 16, lineHeight: 24 }}>
{response}
{isStreaming && <Text style={{ opacity: 0.5 }}>▊</Text>}
</Text>
</ScrollView>
{metrics && <Text style={{ marginTop: 8, color: '#666', fontSize: 12 }}>{metrics}</Text>}
</View>
)
}
Custom Streaming Hook
useStreamingGenerate.ts
Copy
Ask AI
import { useState, useCallback, useRef } from 'react'
import { RunAnywhere, GenerationOptions, GenerationResult } from '@runanywhere/core'
interface StreamingState {
text: string
isStreaming: boolean
error: Error | null
metrics: GenerationResult | null
}
export function useStreamingGenerate() {
const [state, setState] = useState<StreamingState>({
text: '',
isStreaming: false,
error: null,
metrics: null,
})
const abortRef = useRef(false)
const stream = useCallback(async (prompt: string, options?: GenerationOptions) => {
abortRef.current = false
setState({ text: '', isStreaming: true, error: null, metrics: null })
try {
const result = await RunAnywhere.generateStream(prompt, options)
let accumulated = ''
for await (const token of result.stream) {
if (abortRef.current) break
accumulated += token
setState((s) => ({ ...s, text: accumulated }))
}
const finalMetrics = await result.result
setState((s) => ({ ...s, isStreaming: false, metrics: finalMetrics }))
return finalMetrics
} catch (error) {
const e = error instanceof Error ? error : new Error('Streaming failed')
setState((s) => ({ ...s, isStreaming: false, error: e }))
throw e
}
}, [])
const cancel = useCallback(async () => {
abortRef.current = true
await RunAnywhere.cancelGeneration()
setState((s) => ({ ...s, isStreaming: false }))
}, [])
return { ...state, stream, cancel }
}
With Typing Animation Effect
TypingEffect.tsx
Copy
Ask AI
import React, { useState, useEffect, useCallback } from 'react'
import { Text, View, Button } from 'react-native'
import { RunAnywhere } from '@runanywhere/core'
export function TypingEffect() {
const [tokens, setTokens] = useState<string[]>([])
const [displayedText, setDisplayedText] = useState('')
const [isStreaming, setIsStreaming] = useState(false)
// Animate tokens with a slight delay for effect
useEffect(() => {
if (tokens.length === 0) return
let index = 0
const interval = setInterval(() => {
if (index < tokens.length) {
setDisplayedText(tokens.slice(0, index + 1).join(''))
index++
} else {
clearInterval(interval)
}
}, 20) // 20ms per token
return () => clearInterval(interval)
}, [tokens])
const handleGenerate = useCallback(async () => {
setTokens([])
setDisplayedText('')
setIsStreaming(true)
const result = await RunAnywhere.generateStream('What makes a good programmer?', {
maxTokens: 150,
})
const allTokens: string[] = []
for await (const token of result.stream) {
allTokens.push(token)
}
setTokens(allTokens)
setIsStreaming(false)
}, [])
return (
<View style={{ padding: 16 }}>
<Button
title={isStreaming ? 'Generating...' : 'Generate'}
onPress={handleGenerate}
disabled={isStreaming}
/>
<Text style={{ marginTop: 16, fontSize: 16 }}>
{displayedText}
{isStreaming && '▊'}
</Text>
</View>
)
}
Event-Based Streaming
You can also use the EventBus for streaming:Copy
Ask AI
import { RunAnywhere, EventBus } from '@runanywhere/core'
// Subscribe to generation events
const unsubscribe = RunAnywhere.events.onGeneration((event) => {
switch (event.type) {
case 'started':
console.log('Generation started')
break
case 'tokenGenerated':
process.stdout.write(event.token)
break
case 'completed':
console.log('\nDone:', event.response.tokensUsed, 'tokens')
break
case 'failed':
console.error('Error:', event.error)
break
case 'cancelled':
console.log('Generation cancelled')
break
}
})
// Generate (tokens will be emitted via events)
await RunAnywhere.generate('Write a poem', { maxTokens: 100 })
// Clean up
unsubscribe()
Performance Tips
Streaming has minimal overhead compared to non-streaming generation. The time-to-first-token
(TTFT) is the same, and total generation time is nearly identical.
Optimize UI Updates
For very fast generation, batch UI updates to avoid overwhelming React:Copy
Ask AI
import { useCallback, useRef, useState } from 'react'
function useThrottledText(delay = 16) {
// ~60fps
const [text, setText] = useState('')
const pendingRef = useRef('')
const frameRef = useRef<number>()
const appendToken = useCallback((token: string) => {
pendingRef.current += token
if (!frameRef.current) {
frameRef.current = requestAnimationFrame(() => {
setText(pendingRef.current)
frameRef.current = undefined
})
}
}, [])
const reset = useCallback(() => {
pendingRef.current = ''
setText('')
if (frameRef.current) {
cancelAnimationFrame(frameRef.current)
frameRef.current = undefined
}
}, [])
return { text, appendToken, reset }
}