Follow these best practices to build high-quality AI-powered Flutter apps.
Reference Implementation: The Flutter Starter
Example demonstrates all these best
practices in a production-ready app.
Model Selection
Choose the right model size for your use case:
| Model Size | RAM Required | Use Case | Example |
|---|
| 360M–500M (Q8) | ~500MB | Fast chat, commands | SmolLM2-360M |
| 1B–3B (Q4/Q6) | 1–2GB | Balanced quality/speed | Qwen2.5-1.5B |
| 7B (Q4) | 4–5GB | High quality | Llama-3.2-7B |
Start small — Use the smallest model that meets your quality needs. You can always upgrade
later.
Memory Management
Unload Unused Models
// Unload when not in use
await RunAnywhere.unloadModel();
await RunAnywhere.unloadSTTModel();
await RunAnywhere.unloadTTSVoice();
Check Available Memory
final storageInfo = await RunAnywhere.getStorageInfo();
final freeSpace = storageInfo.deviceStorage.freeSpace;
// Ensure enough space before downloading
final model = await RunAnywhere.availableModels()
.then((m) => m.firstWhere((m) => m.id == 'my-model'));
if (model.downloadSize != null && model.downloadSize! * 2 > freeSpace) {
showError('Not enough storage. Need ${model.downloadSize! * 2} bytes.');
}
Clean Up Old Models
// Delete unused models
await RunAnywhere.deleteStoredModel('old-model-id');
// Get storage usage
final models = await RunAnywhere.getDownloadedModelsWithInfo();
for (final model in models) {
print('${model.id}: ${(model.size / 1024 / 1024).toStringAsFixed(1)} MB');
}
Streaming for Better UX
Always prefer streaming for chat interfaces:
// ✅ Good: Streaming shows progress
final result = await RunAnywhere.generateStream(prompt);
await for (final token in result.stream) {
setState(() => _response += token);
}
// ❌ Avoid: User waits with no feedback
final response = await RunAnywhere.generate(prompt);
setState(() => _response = response.text);
Preload Models
Load models during idle time, not when user needs them:
class MyApp extends StatefulWidget {
@override
_MyAppState createState() => _MyAppState();
}
class _MyAppState extends State<MyApp> {
@override
void initState() {
super.initState();
_preloadModels(); // Load in background
}
Future<void> _preloadModels() async {
// Download if needed
final models = await RunAnywhere.availableModels();
final llm = models.firstWhere((m) => m.id == 'smollm2-360m-q8_0');
if (!llm.isDownloaded) {
await for (final p in RunAnywhere.downloadModel(llm.id)) {
if (p.state.isCompleted) break;
}
}
// Preload into memory
await RunAnywhere.loadModel(llm.id);
}
}
Error Handling
Always handle errors gracefully:
Future<void> generateResponse(String prompt) async {
try {
final result = await RunAnywhere.generate(prompt);
setState(() => _response = result.text);
} on SDKError catch (e) {
setState(() => _error = _getUserFriendlyMessage(e));
} catch (e) {
setState(() => _error = 'Something went wrong. Please try again.');
}
}
String _getUserFriendlyMessage(SDKError error) {
// Convert technical errors to user-friendly messages
switch (error.code) {
case SDKErrorCode.componentNotReady:
return 'AI is loading. Please wait...';
case SDKErrorCode.generationFailed:
return 'Could not generate response. Try again.';
default:
return 'An error occurred.';
}
}
Test on Physical Devices
Always test on real devices — Emulators are significantly slower for AI inference. Performance
you see in emulators is not representative of real-world usage.
- iOS Simulator: 5-10x slower than device
- Android Emulator: 3-5x slower than device
Download Management
Show Progress
Widget _buildDownloadProgress(String modelId) {
return StreamBuilder<DownloadProgress>(
stream: RunAnywhere.downloadModel(modelId),
builder: (context, snapshot) {
if (!snapshot.hasData) return CircularProgressIndicator();
final progress = snapshot.data!;
return Column(
children: [
LinearProgressIndicator(value: progress.percentage),
Text('${(progress.percentage * 100).toStringAsFixed(0)}%'),
Text('${(progress.bytesDownloaded / 1024 / 1024).toStringAsFixed(1)} MB'),
],
);
},
);
}
Download on WiFi
import 'package:connectivity_plus/connectivity_plus.dart';
Future<bool> shouldDownload() async {
final connectivity = await Connectivity().checkConnectivity();
return connectivity == ConnectivityResult.wifi;
}
Future<void> downloadWithCheck(String modelId) async {
if (!await shouldDownload()) {
final proceed = await showDialog<bool>(
context: context,
builder: (ctx) => AlertDialog(
title: Text('Download on Cellular?'),
content: Text('This model is large. Download on WiFi to save data.'),
actions: [
TextButton(onPressed: () => Navigator.pop(ctx, false), child: Text('Wait for WiFi')),
TextButton(onPressed: () => Navigator.pop(ctx, true), child: Text('Download Now')),
],
),
);
if (proceed != true) return;
}
await for (final p in RunAnywhere.downloadModel(modelId)) {
if (p.state.isCompleted) break;
}
}
Voice Assistant Tips
Optimize Latency
// Load all components during initialization
await Future.wait([
RunAnywhere.loadSTTModel('whisper-tiny-en'),
RunAnywhere.loadModel('smollm2-360m-q8_0'),
RunAnywhere.loadTTSVoice('piper-en-us-amy'),
]);
Handle Background/Foreground
class VoiceApp extends StatefulWidget {
@override
_VoiceAppState createState() => _VoiceAppState();
}
class _VoiceAppState extends State<VoiceApp> with WidgetsBindingObserver {
VoiceSessionHandle? _session;
@override
void initState() {
super.initState();
WidgetsBinding.instance.addObserver(this);
}
@override
void didChangeAppLifecycleState(AppLifecycleState state) {
if (state == AppLifecycleState.paused) {
_session?.stop(); // Stop when app goes to background
}
}
@override
void dispose() {
WidgetsBinding.instance.removeObserver(this);
_session?.stop();
super.dispose();
}
}
Track performance metrics:
final result = await RunAnywhere.generate(prompt);
// Log performance
analytics.logEvent('generation', {
'tokens': result.tokensUsed,
'latency_ms': result.latencyMs,
'tokens_per_second': result.tokensPerSecond,
'model': result.modelUsed,
});
Summary Checklist
See Also