Stream tokens in real-time for a responsive user experience. Ideal for chat interfaces where users expect to see text appear progressively.
final streamResult = await RunAnywhere.generateStream(
'Tell me a story about a robot',
options: LLMGenerationOptions(maxTokens: 500),
);
// Display tokens as they arrive
await for (final token in streamResult.stream) {
stdout.write(token); // Real-time output
// Or update UI: setState(() => _response += token);
}
// Get final metrics after streaming completes
final metrics = await streamResult.result;
print('\n\nGenerated ${metrics.tokensUsed} tokens');
print('Speed: ${metrics.tokensPerSecond.toStringAsFixed(1)} tok/s');
LLMStreamingResult
| Property | Type | Description |
|---|
stream | Stream<String> | Stream of tokens |
result | Future<LLMGenerationResult> | Final result with metrics |
cancel | void Function() | Cancel generation |
class StreamingChatWidget extends StatefulWidget {
@override
_StreamingChatWidgetState createState() => _StreamingChatWidgetState();
}
class _StreamingChatWidgetState extends State<StreamingChatWidget> {
String _response = '';
bool _isStreaming = false;
LLMStreamingResult? _currentStream;
Future<void> _startStreaming(String prompt) async {
setState(() {
_response = '';
_isStreaming = true;
});
try {
_currentStream = await RunAnywhere.generateStream(
prompt,
options: LLMGenerationOptions(maxTokens: 300),
);
await for (final token in _currentStream!.stream) {
setState(() => _response += token);
}
final metrics = await _currentStream!.result;
print('Generated at ${metrics.tokensPerSecond.toStringAsFixed(1)} tok/s');
} finally {
setState(() => _isStreaming = false);
}
}
void _cancelStreaming() {
_currentStream?.cancel();
}
@override
Widget build(BuildContext context) {
return Column(
children: [
Expanded(
child: SingleChildScrollView(
child: Text(_response),
),
),
Row(
children: [
ElevatedButton(
onPressed: _isStreaming ? null : () => _startStreaming('Tell me a joke'),
child: Text('Generate'),
),
if (_isStreaming)
TextButton(
onPressed: _cancelStreaming,
child: Text('Cancel'),
),
],
),
],
);
}
}
Cancellation
Cancel ongoing generation at any time:
final streamResult = await RunAnywhere.generateStream('Long story...');
// Start a timer to cancel after 5 seconds
Future.delayed(Duration(seconds: 5), () {
streamResult.cancel();
print('Generation cancelled');
});
// Or cancel via the static method
await RunAnywhere.cancelGeneration();
Best Practices
Use streaming for chat interfaces — Users perceive the app as more responsive when they see
tokens appear progressively, even if total generation time is the same.
- Update UI incrementally — Append tokens to your state as they arrive
- Show a cancel button — Let users stop long generations
- Handle cancellation gracefully — The stream will complete when cancelled
- Get final metrics — Always await
result for accurate performance data
See Also