> ## Documentation Index
> Fetch the complete documentation index at: https://docs.runanywhere.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Streaming STT

> Real-time audio transcription

## Overview

Real-time STT streaming allows you to transcribe audio as it's being recorded, providing immediate feedback to users. This is essential for voice interfaces where low latency is critical.

## Basic Usage

```typescript theme={null}
import { RunAnywhere } from '@runanywhere/core'

// Start streaming transcription
const session = await RunAnywhere.startSTTStream({
  language: 'en',
  onPartialResult: (partial) => {
    console.log('Partial:', partial.text)
  },
  onFinalResult: (final) => {
    console.log('Final:', final.text)
  },
})

// Feed audio chunks as they arrive
session.feedAudio(audioChunk) // Float32 samples

// When done
const result = await session.stop()
console.log('Complete transcription:', result.text)
```

## API Reference

### `startSTTStream`

Start a streaming transcription session.

```typescript theme={null}
await RunAnywhere.startSTTStream(
  config: STTStreamConfig
): Promise<STTStreamSession>
```

### Configuration

```typescript theme={null}
interface STTStreamConfig {
  /** Language code (e.g., 'en', 'es') */
  language?: string

  /** Sample rate of incoming audio (default: 16000) */
  sampleRate?: number

  /** Callback for partial results */
  onPartialResult?: (result: STTPartialResult) => void

  /** Callback for final segment results */
  onFinalResult?: (result: STTResult) => void

  /** Callback for errors */
  onError?: (error: Error) => void

  /** Enable Voice Activity Detection */
  enableVAD?: boolean
}
```

### Session Methods

```typescript theme={null}
interface STTStreamSession {
  /** Feed audio samples to the stream */
  feedAudio(samples: number[]): void

  /** Pause transcription */
  pause(): void

  /** Resume transcription */
  resume(): void

  /** Stop and get final result */
  stop(): Promise<STTResult>

  /** Check if session is active */
  isActive: boolean
}
```

## Examples

### Real-Time Microphone Transcription

```tsx LiveTranscription.tsx theme={null}
import React, { useState, useCallback, useRef, useEffect } from 'react'
import { View, Button, Text } from 'react-native'
import { RunAnywhere, STTStreamSession } from '@runanywhere/core'
import AudioRecord from 'react-native-audio-record'

export function LiveTranscription() {
  const [isListening, setIsListening] = useState(false)
  const [transcript, setTranscript] = useState('')
  const [partial, setPartial] = useState('')
  const sessionRef = useRef<STTStreamSession | null>(null)

  useEffect(() => {
    // Initialize audio recording
    AudioRecord.init({
      sampleRate: 16000,
      channels: 1,
      bitsPerSample: 16,
      audioSource: 6,
    })

    return () => {
      if (sessionRef.current?.isActive) {
        sessionRef.current.stop()
      }
    }
  }, [])

  const startListening = useCallback(async () => {
    setIsListening(true)
    setTranscript('')
    setPartial('')

    // Start STT stream
    sessionRef.current = await RunAnywhere.startSTTStream({
      language: 'en',
      enableVAD: true,
      onPartialResult: (result) => {
        setPartial(result.text)
      },
      onFinalResult: (result) => {
        setTranscript((prev) => prev + result.text + ' ')
        setPartial('')
      },
    })

    // Start audio recording and pipe to STT
    AudioRecord.start()
    AudioRecord.on('data', (data: string) => {
      // Convert base64 to float32 samples
      const samples = base64ToFloat32(data)
      sessionRef.current?.feedAudio(samples)
    })
  }, [])

  const stopListening = useCallback(async () => {
    setIsListening(false)
    AudioRecord.stop()

    if (sessionRef.current) {
      const finalResult = await sessionRef.current.stop()
      setTranscript((prev) => prev + finalResult.text)
      setPartial('')
    }
  }, [])

  return (
    <View style={{ padding: 16 }}>
      <Button
        title={isListening ? '🔴 Stop' : '🎤 Start'}
        onPress={isListening ? stopListening : startListening}
      />
      <View style={{ marginTop: 16 }}>
        <Text style={{ fontSize: 16 }}>
          {transcript}
          <Text style={{ color: '#888' }}>{partial}</Text>
        </Text>
      </View>
    </View>
  )
}

// Helper to convert base64 audio to float32 samples
function base64ToFloat32(base64: string): number[] {
  const binary = atob(base64)
  const int16Array = new Int16Array(binary.length / 2)
  for (let i = 0; i < int16Array.length; i++) {
    int16Array[i] = binary.charCodeAt(i * 2) | (binary.charCodeAt(i * 2 + 1) << 8)
  }
  return Array.from(int16Array).map((x) => x / 32768.0)
}
```

### With VAD Integration

When VAD is enabled, the stream automatically detects speech segments:

```typescript theme={null}
const session = await RunAnywhere.startSTTStream({
  language: 'en',
  enableVAD: true,
  onPartialResult: (result) => {
    // Updates while user is speaking
    updateUI(result.text)
  },
  onFinalResult: (result) => {
    // Called when VAD detects end of speech
    console.log('User finished speaking:', result.text)
  },
})

// Audio is processed, VAD handles segmentation automatically
session.feedAudio(audioSamples)
```

### Custom Hook

```typescript useSTTStream.ts theme={null}
import { useState, useRef, useCallback } from 'react'
import { RunAnywhere, STTStreamSession, STTResult } from '@runanywhere/core'

export function useSTTStream() {
  const [isStreaming, setIsStreaming] = useState(false)
  const [partial, setPartial] = useState('')
  const [transcript, setTranscript] = useState('')
  const sessionRef = useRef<STTStreamSession | null>(null)

  const start = useCallback(async (language = 'en') => {
    setIsStreaming(true)
    setPartial('')

    sessionRef.current = await RunAnywhere.startSTTStream({
      language,
      enableVAD: true,
      onPartialResult: (result) => setPartial(result.text),
      onFinalResult: (result) => {
        setTranscript((prev) => prev + result.text + ' ')
        setPartial('')
      },
    })
  }, [])

  const feedAudio = useCallback((samples: number[]) => {
    sessionRef.current?.feedAudio(samples)
  }, [])

  const stop = useCallback(async (): Promise<STTResult | null> => {
    setIsStreaming(false)
    if (sessionRef.current) {
      const result = await sessionRef.current.stop()
      setTranscript((prev) => prev + result.text)
      setPartial('')
      return result
    }
    return null
  }, [])

  const reset = useCallback(() => {
    setTranscript('')
    setPartial('')
  }, [])

  return {
    isStreaming,
    partial,
    transcript,
    start,
    feedAudio,
    stop,
    reset,
  }
}
```

## Performance Considerations

<Tip>
  Feed audio in chunks of 100-500ms for optimal balance between latency and accuracy. Too small
  chunks increase overhead; too large chunks increase perceived latency.
</Tip>

### Optimal Chunk Size

```typescript theme={null}
// Recommended chunk sizes for 16kHz audio
const CHUNK_DURATION_MS = 200
const SAMPLE_RATE = 16000
const CHUNK_SIZE = (SAMPLE_RATE * CHUNK_DURATION_MS) / 1000 // 3200 samples
```

### Buffer Management

```typescript theme={null}
// Buffer audio to send in optimal chunks
class AudioBuffer {
  private buffer: number[] = []
  private chunkSize: number
  private onChunkReady: (chunk: number[]) => void

  constructor(chunkSize: number, onChunkReady: (chunk: number[]) => void) {
    this.chunkSize = chunkSize
    this.onChunkReady = onChunkReady
  }

  push(samples: number[]) {
    this.buffer.push(...samples)

    while (this.buffer.length >= this.chunkSize) {
      const chunk = this.buffer.splice(0, this.chunkSize)
      this.onChunkReady(chunk)
    }
  }

  flush() {
    if (this.buffer.length > 0) {
      this.onChunkReady(this.buffer)
      this.buffer = []
    }
  }
}
```

## Error Handling

```typescript theme={null}
const session = await RunAnywhere.startSTTStream({
  language: 'en',
  onError: (error) => {
    console.error('STT stream error:', error.message)
    // Handle gracefully - maybe restart the stream
  },
})

// Wrap feedAudio in try-catch for safety
try {
  session.feedAudio(samples)
} catch (error) {
  console.error('Failed to feed audio:', error)
}
```

## Related

<CardGroup cols={2}>
  <Card title="Transcribe" icon="file-audio" href="/react-native/stt/transcribe">
    Batch transcription
  </Card>

  <Card title="STT Options" icon="sliders" href="/react-native/stt/options">
    Configuration options
  </Card>

  <Card title="VAD" icon="waveform-lines" href="/react-native/vad">
    Voice Activity Detection
  </Card>

  <Card title="Voice Agent" icon="robot" href="/react-native/voice-agent">
    Full voice pipeline
  </Card>
</CardGroup>
