Complete Example
Here’s a complete example to get you started with on-device text generation:App.tsx
Copy
Ask AI
import React, { useState, useEffect } from 'react';
import { View, Text, Button, ActivityIndicator } from 'react-native';
import { RunAnywhere, SDKEnvironment, ModelCategory } from '@runanywhere/core';
import { LlamaCPP } from '@runanywhere/llamacpp';
export default function App() {
const [isReady, setIsReady] = useState(false);
const [response, setResponse] = useState('');
const [loading, setLoading] = useState(false);
useEffect(() => {
async function setup() {
// 1. Initialize SDK (fast, ~1-5ms)
await RunAnywhere.initialize({
environment: SDKEnvironment.Development,
});
// 2. Register LlamaCPP module
LlamaCPP.register();
// 3. Add a model
await LlamaCPP.addModel({
id: 'smollm2-360m',
name: 'SmolLM2 360M',
url: 'https://huggingface.co/prithivMLmods/SmolLM2-360M-GGUF/resolve/main/SmolLM2-360M.Q8_0.gguf',
memoryRequirement: 500_000_000,
});
// 4. Download model (shows progress in console)
await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
console.log(`Download: ${(progress.progress * 100).toFixed(1)}%`);
});
// 5. Load model into memory
const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m');
if (modelInfo?.localPath) {
await RunAnywhere.loadModel(modelInfo.localPath);
}
setIsReady(true);
}
setup();
}, []);
const generateResponse = async () => {
setLoading(true);
try {
const result = await RunAnywhere.generate(
'Explain quantum computing in simple terms',
{
maxTokens: 200,
temperature: 0.7,
}
);
setResponse(result.text);
console.log(`Generated in ${result.latencyMs}ms at ${result.performanceMetrics.tokensPerSecond} tok/s`);
} finally {
setLoading(false);
}
};
if (!isReady) {
return (
<View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
<ActivityIndicator size="large" />
<Text>Setting up AI...</Text>
</View>
);
}
return (
<View style={{ flex: 1, padding: 20 }}>
<Button title="Generate" onPress={generateResponse} disabled={loading} />
{loading && <ActivityIndicator />}
<Text style={{ marginTop: 20 }}>{response}</Text>
</View>
);
}
Step-by-Step Guide
1. Initialize the SDK
Initialize RunAnywhere once when your app starts:Copy
Ask AI
import { RunAnywhere, SDKEnvironment } from '@runanywhere/core'
await RunAnywhere.initialize({
environment: SDKEnvironment.Development, // No API key needed
})
Environment Options
| Environment | Log Level | Description |
|---|---|---|
Development | Debug | Full logging, local testing |
Staging | Info | Staging backend, moderate logging |
Production | Warning | Production backend, minimal logging |
2. Register Backend Modules
Register the backend modules you need:Copy
Ask AI
import { LlamaCPP } from '@runanywhere/llamacpp'
import { ONNX } from '@runanywhere/onnx'
// Register LLM backend
LlamaCPP.register()
// Register STT/TTS backend (if needed)
ONNX.register()
3. Add Models
Add models from HuggingFace or any direct URL:Copy
Ask AI
// Add LLM model
await LlamaCPP.addModel({
id: 'smollm2-360m',
name: 'SmolLM2 360M',
url: 'https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf',
memoryRequirement: 500_000_000,
})
// Add STT model
await ONNX.addModel({
id: 'whisper-tiny-en',
name: 'Whisper Tiny English',
url: 'https://github.com/.../sherpa-onnx-whisper-tiny.en.tar.gz',
modality: ModelCategory.SpeechRecognition,
artifactType: ModelArtifactType.TarGzArchive,
memoryRequirement: 75_000_000,
})
4. Download the Model
Download with progress tracking:Copy
Ask AI
await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
switch (progress.state) {
case 'downloading':
const percent = (progress.progress * 100).toFixed(1)
console.log(`Downloading: ${percent}%`)
break
case 'extracting':
console.log('Extracting archive...')
break
case 'completed':
console.log('Download complete!')
break
case 'failed':
console.error('Download failed')
break
}
})
5. Load and Generate
Copy
Ask AI
// Load the model into memory
const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m')
await RunAnywhere.loadModel(modelInfo.localPath)
// Simple chat
const response = await RunAnywhere.chat('What is 2+2?')
console.log(response) // "4"
// Or with full metrics
const result = await RunAnywhere.generate('Write a haiku about coding', { maxTokens: 50 })
console.log('Response:', result.text)
console.log('Tokens/sec:', result.performanceMetrics.tokensPerSecond)
6. Stream Responses
For real-time token streaming:Copy
Ask AI
const streamResult = await RunAnywhere.generateStream('Tell me a story about AI', {
maxTokens: 200,
})
// Display tokens as they arrive
for await (const token of streamResult.stream) {
process.stdout.write(token) // Or update UI
}
// Get final metrics
const finalResult = await streamResult.result
console.log('\nSpeed:', finalResult.performanceMetrics.tokensPerSecond, 'tok/s')
Using in a React Component
Here’s a pattern for using the SDK in React components with hooks:useAI.ts
Copy
Ask AI
import { useState, useCallback } from 'react'
import { RunAnywhere, GenerationOptions, GenerationResult } from '@runanywhere/core'
export function useAI() {
const [isGenerating, setIsGenerating] = useState(false)
const [error, setError] = useState<string | null>(null)
const generate = useCallback(
async (prompt: string, options?: GenerationOptions): Promise<GenerationResult | null> => {
setIsGenerating(true)
setError(null)
try {
return await RunAnywhere.generate(prompt, options)
} catch (err) {
setError(err instanceof Error ? err.message : 'Generation failed')
return null
} finally {
setIsGenerating(false)
}
},
[]
)
const chat = useCallback(async (prompt: string): Promise<string> => {
setIsGenerating(true)
setError(null)
try {
return await RunAnywhere.chat(prompt)
} catch (err) {
setError(err instanceof Error ? err.message : 'Chat failed')
return ''
} finally {
setIsGenerating(false)
}
}, [])
return { generate, chat, isGenerating, error }
}