Documentation Index Fetch the complete documentation index at: https://docs.runanywhere.ai/llms.txt
Use this file to discover all available pages before exploring further.
Want a complete working app? Clone our Flutter Starter
Example to get started immediately with
a full-featured demo app.
Complete Example
Here’s a complete example to get you started with on-device text generation:
import 'package:flutter/material.dart' ;
import 'package:runanywhere/runanywhere.dart' ;
import 'package:runanywhere_llamacpp/runanywhere_llamacpp.dart' ;
void main () async {
WidgetsFlutterBinding . ensureInitialized ();
// 1. Initialize SDK
await RunAnywhere . initialize ();
// 2. Register LlamaCpp backend
await LlamaCpp . register ();
// 3. Add a model
LlamaCpp . addModel (
id : 'smollm2-360m-q8_0' ,
name : 'SmolLM2 360M Q8_0' ,
url : 'https://huggingface.co/prithivMLmods/SmolLM2-360M-GGUF/resolve/main/SmolLM2-360M.Q8_0.gguf' ,
memoryRequirement : 500000000 ,
);
runApp ( const MyApp ());
}
class MyApp extends StatelessWidget {
const MyApp ({ super .key});
@override
Widget build ( BuildContext context) {
return MaterialApp (
home : ChatScreen (),
);
}
}
class ChatScreen extends StatefulWidget {
@override
_ChatScreenState createState () => _ChatScreenState ();
}
class _ChatScreenState extends State < ChatScreen > {
String _response = '' ;
bool _isLoading = false ;
bool _isModelLoaded = false ;
Future < void > _loadModel () async {
setState (() => _isLoading = true );
try {
// Download model (with progress)
await for ( final progress in RunAnywhere . downloadModel ( 'smollm2-360m-q8_0' )) {
print ( 'Download: ${( progress . percentage * 100 ). toStringAsFixed ( 1 )} %' );
if (progress.state.isCompleted) break ;
}
// Load model
await RunAnywhere . loadModel ( 'smollm2-360m-q8_0' );
setState (() => _isModelLoaded = true );
} catch (e) {
setState (() => _response = 'Error: $ e ' );
} finally {
setState (() => _isLoading = false );
}
}
Future < void > _generateText () async {
setState (() => _isLoading = true );
try {
final result = await RunAnywhere . generate (
'Explain quantum computing in simple terms' ,
options : LLMGenerationOptions (
maxTokens : 200 ,
temperature : 0.7 ,
),
);
setState (() => _response = result.text);
print ( 'Speed: ${ result . tokensPerSecond . toStringAsFixed ( 1 )} tok/s' );
} catch (e) {
setState (() => _response = 'Error: $ e ' );
} finally {
setState (() => _isLoading = false );
}
}
@override
Widget build ( BuildContext context) {
return Scaffold (
appBar : AppBar (title : Text ( 'RunAnywhere Demo' )),
body : Padding (
padding : EdgeInsets . all ( 16 ),
child : Column (
children : [
if ( ! _isModelLoaded)
ElevatedButton (
onPressed : _isLoading ? null : _loadModel,
child : Text (_isLoading ? 'Loading...' : 'Download & Load Model' ),
),
if (_isModelLoaded)
ElevatedButton (
onPressed : _isLoading ? null : _generateText,
child : Text (_isLoading ? 'Generating...' : 'Generate' ),
),
SizedBox (height : 20 ),
Expanded (
child : SingleChildScrollView (
child : Text (_response),
),
),
],
),
),
);
}
}
Step-by-Step Breakdown
1. Initialize the SDK
await RunAnywhere . initialize ();
Initialize once at app startup. Development mode requires no API key.
2. Register Backend Modules
await LlamaCpp . register (); // For LLM
await Onnx . register (); // For STT/TTS (optional)
Register the backends you need. Each backend is a separate package.
3. Register Models
LlamaCpp . addModel (
id : 'smollm2-360m-q8_0' ,
name : 'SmolLM2 360M Q8_0' ,
url : 'https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf' ,
memoryRequirement : 500000000 ,
);
Register models with their download URLs. Models are downloaded on-demand.
4. Download & Load Models
// Download with progress tracking
await for ( final progress in RunAnywhere . downloadModel ( 'smollm2-360m-q8_0' )) {
print ( ' ${( progress . percentage * 100 ). toStringAsFixed ( 1 )} %' );
if (progress.state.isCompleted) break ;
}
// Load into memory
await RunAnywhere . loadModel ( 'smollm2-360m-q8_0' );
5. Generate Text
// Simple chat
final response = await RunAnywhere . chat ( 'Hello!' );
// Full generation with metrics
final result = await RunAnywhere . generate (
'Your prompt here' ,
options : LLMGenerationOptions (maxTokens : 200 ),
);
print ( 'Response: ${ result . text } ' );
print ( 'Speed: ${ result . tokensPerSecond } tok/s' );
Environment Options
Environment Description API Key Required .developmentVerbose logging, local-only No .stagingTesting with real services Yes .productionMinimal logging, telemetry Yes
// Production initialization
await RunAnywhere . initialize (
apiKey : 'your-api-key' ,
baseURL : 'https://api.runanywhere.ai' ,
environment : SDKEnvironment .production,
);
Starter Example Repository
For a complete working application with all features demonstrated, clone our starter example:
git clone https://github.com/RunanywhereAI/flutter-starter-example.git
cd flutter-starter-example
flutter pub get
cd ios && pod install && cd ..
flutter run
The Flutter Starter Example includes:
AI Chat with streaming responses
Speech-to-Text transcription
Text-to-Speech synthesis
Voice Assistant pipeline
Model download and management UI
Next Steps
LLM Chat Simple text generation
Streaming Real-time token streaming
Speech-to-Text Audio transcription
Text-to-Speech Voice synthesis