Complete Example
Here’s a complete example to get you started with on-device text generation:
import React, { useState, useEffect } from 'react';
import { View, Text, Button, ActivityIndicator } from 'react-native';
import { RunAnywhere, SDKEnvironment, ModelCategory } from '@runanywhere/core';
import { LlamaCPP } from '@runanywhere/llamacpp';
export default function App() {
const [isReady, setIsReady] = useState(false);
const [response, setResponse] = useState('');
const [loading, setLoading] = useState(false);
useEffect(() => {
async function setup() {
// 1. Initialize SDK (fast, ~1-5ms)
await RunAnywhere.initialize({
environment: SDKEnvironment.Development,
});
// 2. Register LlamaCPP module
LlamaCPP.register();
// 3. Add a model
await LlamaCPP.addModel({
id: 'smollm2-360m',
name: 'SmolLM2 360M',
url: 'https://huggingface.co/prithivMLmods/SmolLM2-360M-GGUF/resolve/main/SmolLM2-360M.Q8_0.gguf',
memoryRequirement: 500_000_000,
});
// 4. Download model (shows progress in console)
await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
console.log(`Download: ${(progress.progress * 100).toFixed(1)}%`);
});
// 5. Load model into memory
const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m');
if (modelInfo?.localPath) {
await RunAnywhere.loadModel(modelInfo.localPath);
}
setIsReady(true);
}
setup();
}, []);
const generateResponse = async () => {
setLoading(true);
try {
const result = await RunAnywhere.generate(
'Explain quantum computing in simple terms',
{
maxTokens: 200,
temperature: 0.7,
}
);
setResponse(result.text);
console.log(`Generated in ${result.latencyMs}ms at ${result.performanceMetrics.tokensPerSecond} tok/s`);
} finally {
setLoading(false);
}
};
if (!isReady) {
return (
<View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
<ActivityIndicator size="large" />
<Text>Setting up AI...</Text>
</View>
);
}
return (
<View style={{ flex: 1, padding: 20 }}>
<Button title="Generate" onPress={generateResponse} disabled={loading} />
{loading && <ActivityIndicator />}
<Text style={{ marginTop: 20 }}>{response}</Text>
</View>
);
}
Step-by-Step Guide
1. Initialize the SDK
Initialize RunAnywhere once when your app starts:
import { RunAnywhere, SDKEnvironment } from '@runanywhere/core'
await RunAnywhere.initialize({
environment: SDKEnvironment.Development, // No API key needed
})
Environment Options
| Environment | Log Level | Description |
|---|
Development | Debug | Full logging, local testing |
Staging | Info | Staging backend, moderate logging |
Production | Warning | Production backend, minimal logging |
2. Register Backend Modules
Register the backend modules you need:
import { LlamaCPP } from '@runanywhere/llamacpp'
import { ONNX } from '@runanywhere/onnx'
// Register LLM backend
LlamaCPP.register()
// Register STT/TTS backend (if needed)
ONNX.register()
3. Add Models
Add models from HuggingFace or any direct URL:
// Add LLM model
await LlamaCPP.addModel({
id: 'smollm2-360m',
name: 'SmolLM2 360M',
url: 'https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf',
memoryRequirement: 500_000_000,
})
// Add STT model
await ONNX.addModel({
id: 'whisper-tiny-en',
name: 'Whisper Tiny English',
url: 'https://github.com/.../sherpa-onnx-whisper-tiny.en.tar.gz',
modality: ModelCategory.SpeechRecognition,
artifactType: ModelArtifactType.TarGzArchive,
memoryRequirement: 75_000_000,
})
4. Download the Model
Download with progress tracking:
await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
switch (progress.state) {
case 'downloading':
const percent = (progress.progress * 100).toFixed(1)
console.log(`Downloading: ${percent}%`)
break
case 'extracting':
console.log('Extracting archive...')
break
case 'completed':
console.log('Download complete!')
break
case 'failed':
console.error('Download failed')
break
}
})
5. Load and Generate
// Load the model into memory (loadModel takes a file path, not a model ID)
const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m')
await RunAnywhere.loadModel(modelInfo.localPath)
// Load STT model (requires path AND engine name)
const sttInfo = await RunAnywhere.getModelInfo('whisper-tiny-en')
await RunAnywhere.loadSTTModel(sttInfo.localPath, 'whisper')
// Load TTS model (requires path AND engine name)
const ttsInfo = await RunAnywhere.getModelInfo('piper-en-lessac')
await RunAnywhere.loadTTSModel(ttsInfo.localPath, 'piper')
// Simple chat
const response = await RunAnywhere.chat('What is 2+2?')
console.log(response) // "4"
// Or with full metrics
const result = await RunAnywhere.generate('Write a haiku about coding', { maxTokens: 50 })
console.log('Response:', result.text)
console.log('Tokens/sec:', result.performanceMetrics.tokensPerSecond)
Unlike LLM loading which takes just a file path, STT and TTS model loading requires both the file
path AND the engine name ('whisper' for STT, 'piper' for TTS). Use getModelInfo() to
retrieve the downloaded model’s local path.
6. Stream Responses
For real-time token streaming:
const streamResult = await RunAnywhere.generateStream('Tell me a story about AI', {
maxTokens: 200,
})
// Accumulate and display tokens as they arrive
let fullResponse = ''
for await (const token of streamResult.stream) {
fullResponse += token
setResponse(fullResponse) // Update UI state
}
// Get final metrics (LLMGenerationResult has tokensPerSecond at top level)
const finalResult = await streamResult.result
console.log('Speed:', finalResult.tokensPerSecond, 'tok/s')
Using in a React Component
Here’s a pattern for using the SDK in React components with hooks:
import { useState, useCallback } from 'react'
import { RunAnywhere, GenerationOptions, GenerationResult } from '@runanywhere/core'
export function useAI() {
const [isGenerating, setIsGenerating] = useState(false)
const [error, setError] = useState<string | null>(null)
const generate = useCallback(
async (prompt: string, options?: GenerationOptions): Promise<GenerationResult | null> => {
setIsGenerating(true)
setError(null)
try {
return await RunAnywhere.generate(prompt, options)
} catch (err) {
setError(err instanceof Error ? err.message : 'Generation failed')
return null
} finally {
setIsGenerating(false)
}
},
[]
)
const chat = useCallback(async (prompt: string): Promise<string> => {
setIsGenerating(true)
setError(null)
try {
return await RunAnywhere.chat(prompt)
} catch (err) {
setError(err instanceof Error ? err.message : 'Chat failed')
return ''
} finally {
setIsGenerating(false)
}
}, [])
return { generate, chat, isGenerating, error }
}
What’s Next?