Skip to main content

Overview

Token streaming allows you to display AI responses as they’re generated, token by token. This provides a much better user experience than waiting for the entire response, especially for longer outputs.

Basic Usage

import { RunAnywhere } from '@runanywhere/core'

const streamResult = await RunAnywhere.generateStream('Write a short story about a robot', {
  maxTokens: 200,
})

// Display tokens as they arrive
for await (const token of streamResult.stream) {
  process.stdout.write(token) // Each token as it's generated
}

// Get final metrics
const finalResult = await streamResult.result
console.log('\nSpeed:', finalResult.performanceMetrics.tokensPerSecond, 'tok/s')

API Reference

await RunAnywhere.generateStream(
  prompt: string,
  options?: GenerationOptions
): Promise<LLMStreamingResult>

Parameters

Same as generate() - see Generation Options.

Returns

interface LLMStreamingResult {
  /** Async iterator yielding tokens one at a time */
  stream: AsyncIterable<string>

  /** Promise resolving to final GenerationResult with metrics */
  result: Promise<GenerationResult>
}

Examples

React Native Component

StreamingChat.tsx
import React, { useState, useCallback } from 'react'
import { View, Text, Button, ScrollView } from 'react-native'
import { RunAnywhere } from '@runanywhere/core'

export function StreamingChat() {
  const [response, setResponse] = useState('')
  const [isStreaming, setIsStreaming] = useState(false)
  const [metrics, setMetrics] = useState<string>('')

  const handleStream = useCallback(async () => {
    setResponse('')
    setMetrics('')
    setIsStreaming(true)

    try {
      const streamResult = await RunAnywhere.generateStream('Explain how neural networks work', {
        maxTokens: 300,
        temperature: 0.7,
      })

      // Accumulate tokens
      let fullText = ''
      for await (const token of streamResult.stream) {
        fullText += token
        setResponse(fullText)
      }

      // Show final metrics
      const final = await streamResult.result
      setMetrics(
        `${final.performanceMetrics.tokensPerSecond?.toFixed(1)} tok/s | ` +
          `${final.latencyMs}ms | ${final.tokensUsed} tokens`
      )
    } catch (error) {
      setResponse('Error: ' + (error as Error).message)
    } finally {
      setIsStreaming(false)
    }
  }, [])

  return (
    <View style={{ flex: 1, padding: 16 }}>
      <Button
        title={isStreaming ? 'Streaming...' : 'Start Streaming'}
        onPress={handleStream}
        disabled={isStreaming}
      />

      <ScrollView style={{ flex: 1, marginTop: 16 }}>
        <Text style={{ fontSize: 16, lineHeight: 24 }}>
          {response}
          {isStreaming && <Text style={{ opacity: 0.5 }}></Text>}
        </Text>
      </ScrollView>

      {metrics && <Text style={{ marginTop: 8, color: '#666', fontSize: 12 }}>{metrics}</Text>}
    </View>
  )
}

Custom Streaming Hook

useStreamingGenerate.ts
import { useState, useCallback, useRef } from 'react'
import { RunAnywhere, GenerationOptions, GenerationResult } from '@runanywhere/core'

interface StreamingState {
  text: string
  isStreaming: boolean
  error: Error | null
  metrics: GenerationResult | null
}

export function useStreamingGenerate() {
  const [state, setState] = useState<StreamingState>({
    text: '',
    isStreaming: false,
    error: null,
    metrics: null,
  })

  const abortRef = useRef(false)

  const stream = useCallback(async (prompt: string, options?: GenerationOptions) => {
    abortRef.current = false
    setState({ text: '', isStreaming: true, error: null, metrics: null })

    try {
      const result = await RunAnywhere.generateStream(prompt, options)

      let accumulated = ''
      for await (const token of result.stream) {
        if (abortRef.current) break
        accumulated += token
        setState((s) => ({ ...s, text: accumulated }))
      }

      const finalMetrics = await result.result
      setState((s) => ({ ...s, isStreaming: false, metrics: finalMetrics }))

      return finalMetrics
    } catch (error) {
      const e = error instanceof Error ? error : new Error('Streaming failed')
      setState((s) => ({ ...s, isStreaming: false, error: e }))
      throw e
    }
  }, [])

  const cancel = useCallback(async () => {
    abortRef.current = true
    await RunAnywhere.cancelGeneration()
    setState((s) => ({ ...s, isStreaming: false }))
  }, [])

  return { ...state, stream, cancel }
}

With Typing Animation Effect

TypingEffect.tsx
import React, { useState, useEffect, useCallback } from 'react'
import { Text, View, Button } from 'react-native'
import { RunAnywhere } from '@runanywhere/core'

export function TypingEffect() {
  const [tokens, setTokens] = useState<string[]>([])
  const [displayedText, setDisplayedText] = useState('')
  const [isStreaming, setIsStreaming] = useState(false)

  // Animate tokens with a slight delay for effect
  useEffect(() => {
    if (tokens.length === 0) return

    let index = 0
    const interval = setInterval(() => {
      if (index < tokens.length) {
        setDisplayedText(tokens.slice(0, index + 1).join(''))
        index++
      } else {
        clearInterval(interval)
      }
    }, 20) // 20ms per token

    return () => clearInterval(interval)
  }, [tokens])

  const handleGenerate = useCallback(async () => {
    setTokens([])
    setDisplayedText('')
    setIsStreaming(true)

    const result = await RunAnywhere.generateStream('What makes a good programmer?', {
      maxTokens: 150,
    })

    const allTokens: string[] = []
    for await (const token of result.stream) {
      allTokens.push(token)
    }

    setTokens(allTokens)
    setIsStreaming(false)
  }, [])

  return (
    <View style={{ padding: 16 }}>
      <Button
        title={isStreaming ? 'Generating...' : 'Generate'}
        onPress={handleGenerate}
        disabled={isStreaming}
      />
      <Text style={{ marginTop: 16, fontSize: 16 }}>
        {displayedText}
        {isStreaming && '▊'}
      </Text>
    </View>
  )
}

Event-Based Streaming

You can also use the EventBus for streaming:
import { RunAnywhere, EventBus } from '@runanywhere/core'

// Subscribe to generation events
const unsubscribe = RunAnywhere.events.onGeneration((event) => {
  switch (event.type) {
    case 'started':
      console.log('Generation started')
      break
    case 'tokenGenerated':
      process.stdout.write(event.token)
      break
    case 'completed':
      console.log('\nDone:', event.response.tokensUsed, 'tokens')
      break
    case 'failed':
      console.error('Error:', event.error)
      break
    case 'cancelled':
      console.log('Generation cancelled')
      break
  }
})

// Generate (tokens will be emitted via events)
await RunAnywhere.generate('Write a poem', { maxTokens: 100 })

// Clean up
unsubscribe()

Performance Tips

Streaming has minimal overhead compared to non-streaming generation. The time-to-first-token (TTFT) is the same, and total generation time is nearly identical.

Optimize UI Updates

For very fast generation, batch UI updates to avoid overwhelming React:
import { useCallback, useRef, useState } from 'react'

function useThrottledText(delay = 16) {
  // ~60fps
  const [text, setText] = useState('')
  const pendingRef = useRef('')
  const frameRef = useRef<number>()

  const appendToken = useCallback((token: string) => {
    pendingRef.current += token

    if (!frameRef.current) {
      frameRef.current = requestAnimationFrame(() => {
        setText(pendingRef.current)
        frameRef.current = undefined
      })
    }
  }, [])

  const reset = useCallback(() => {
    pendingRef.current = ''
    setText('')
    if (frameRef.current) {
      cancelAnimationFrame(frameRef.current)
      frameRef.current = undefined
    }
  }, [])

  return { text, appendToken, reset }
}