Skip to main content

Complete Example

Here’s a complete example to get you started with on-device text generation:
App.tsx
import React, { useState, useEffect } from 'react';
import { View, Text, Button, ActivityIndicator } from 'react-native';
import { RunAnywhere, SDKEnvironment, ModelCategory } from '@runanywhere/core';
import { LlamaCPP } from '@runanywhere/llamacpp';

export default function App() {
  const [isReady, setIsReady] = useState(false);
  const [response, setResponse] = useState('');
  const [loading, setLoading] = useState(false);

  useEffect(() => {
    async function setup() {
      // 1. Initialize SDK (fast, ~1-5ms)
      await RunAnywhere.initialize({
        environment: SDKEnvironment.Development,
      });

      // 2. Register LlamaCPP module
      LlamaCPP.register();

      // 3. Add a model
      await LlamaCPP.addModel({
        id: 'smollm2-360m',
        name: 'SmolLM2 360M',
        url: 'https://huggingface.co/prithivMLmods/SmolLM2-360M-GGUF/resolve/main/SmolLM2-360M.Q8_0.gguf',
        memoryRequirement: 500_000_000,
      });

      // 4. Download model (shows progress in console)
      await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
        console.log(`Download: ${(progress.progress * 100).toFixed(1)}%`);
      });

      // 5. Load model into memory
      const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m');
      if (modelInfo?.localPath) {
        await RunAnywhere.loadModel(modelInfo.localPath);
      }

      setIsReady(true);
    }

    setup();
  }, []);

  const generateResponse = async () => {
    setLoading(true);
    try {
      const result = await RunAnywhere.generate(
        'Explain quantum computing in simple terms',
        {
          maxTokens: 200,
          temperature: 0.7,
        }
      );
      setResponse(result.text);
      console.log(`Generated in ${result.latencyMs}ms at ${result.performanceMetrics.tokensPerSecond} tok/s`);
    } finally {
      setLoading(false);
    }
  };

  if (!isReady) {
    return (
      <View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
        <ActivityIndicator size="large" />
        <Text>Setting up AI...</Text>
      </View>
    );
  }

  return (
    <View style={{ flex: 1, padding: 20 }}>
      <Button title="Generate" onPress={generateResponse} disabled={loading} />
      {loading && <ActivityIndicator />}
      <Text style={{ marginTop: 20 }}>{response}</Text>
    </View>
  );
}

Step-by-Step Guide

1. Initialize the SDK

Initialize RunAnywhere once when your app starts:
import { RunAnywhere, SDKEnvironment } from '@runanywhere/core'

await RunAnywhere.initialize({
  environment: SDKEnvironment.Development, // No API key needed
})

Environment Options

EnvironmentLog LevelDescription
DevelopmentDebugFull logging, local testing
StagingInfoStaging backend, moderate logging
ProductionWarningProduction backend, minimal logging

2. Register Backend Modules

Register the backend modules you need:
import { LlamaCPP } from '@runanywhere/llamacpp'
import { ONNX } from '@runanywhere/onnx'

// Register LLM backend
LlamaCPP.register()

// Register STT/TTS backend (if needed)
ONNX.register()

3. Add Models

Add models from HuggingFace or any direct URL:
// Add LLM model
await LlamaCPP.addModel({
  id: 'smollm2-360m',
  name: 'SmolLM2 360M',
  url: 'https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf',
  memoryRequirement: 500_000_000,
})

// Add STT model
await ONNX.addModel({
  id: 'whisper-tiny-en',
  name: 'Whisper Tiny English',
  url: 'https://github.com/.../sherpa-onnx-whisper-tiny.en.tar.gz',
  modality: ModelCategory.SpeechRecognition,
  artifactType: ModelArtifactType.TarGzArchive,
  memoryRequirement: 75_000_000,
})

4. Download the Model

Download with progress tracking:
await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
  switch (progress.state) {
    case 'downloading':
      const percent = (progress.progress * 100).toFixed(1)
      console.log(`Downloading: ${percent}%`)
      break
    case 'extracting':
      console.log('Extracting archive...')
      break
    case 'completed':
      console.log('Download complete!')
      break
    case 'failed':
      console.error('Download failed')
      break
  }
})

5. Load and Generate

// Load the model into memory (loadModel takes a file path, not a model ID)
const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m')
await RunAnywhere.loadModel(modelInfo.localPath)

// Load STT model (requires path AND engine name)
const sttInfo = await RunAnywhere.getModelInfo('whisper-tiny-en')
await RunAnywhere.loadSTTModel(sttInfo.localPath, 'whisper')

// Load TTS model (requires path AND engine name)
const ttsInfo = await RunAnywhere.getModelInfo('piper-en-lessac')
await RunAnywhere.loadTTSModel(ttsInfo.localPath, 'piper')

// Simple chat
const response = await RunAnywhere.chat('What is 2+2?')
console.log(response) // "4"

// Or with full metrics
const result = await RunAnywhere.generate('Write a haiku about coding', { maxTokens: 50 })
console.log('Response:', result.text)
console.log('Tokens/sec:', result.performanceMetrics.tokensPerSecond)
Unlike LLM loading which takes just a file path, STT and TTS model loading requires both the file path AND the engine name ('whisper' for STT, 'piper' for TTS). Use getModelInfo() to retrieve the downloaded model’s local path.

6. Stream Responses

For real-time token streaming:
const streamResult = await RunAnywhere.generateStream('Tell me a story about AI', {
  maxTokens: 200,
})

// Accumulate and display tokens as they arrive
let fullResponse = ''
for await (const token of streamResult.stream) {
  fullResponse += token
  setResponse(fullResponse) // Update UI state
}

// Get final metrics (LLMGenerationResult has tokensPerSecond at top level)
const finalResult = await streamResult.result
console.log('Speed:', finalResult.tokensPerSecond, 'tok/s')

Using in a React Component

Here’s a pattern for using the SDK in React components with hooks:
useAI.ts
import { useState, useCallback } from 'react'
import { RunAnywhere, GenerationOptions, GenerationResult } from '@runanywhere/core'

export function useAI() {
  const [isGenerating, setIsGenerating] = useState(false)
  const [error, setError] = useState<string | null>(null)

  const generate = useCallback(
    async (prompt: string, options?: GenerationOptions): Promise<GenerationResult | null> => {
      setIsGenerating(true)
      setError(null)

      try {
        return await RunAnywhere.generate(prompt, options)
      } catch (err) {
        setError(err instanceof Error ? err.message : 'Generation failed')
        return null
      } finally {
        setIsGenerating(false)
      }
    },
    []
  )

  const chat = useCallback(async (prompt: string): Promise<string> => {
    setIsGenerating(true)
    setError(null)

    try {
      return await RunAnywhere.chat(prompt)
    } catch (err) {
      setError(err instanceof Error ? err.message : 'Chat failed')
      return ''
    } finally {
      setIsGenerating(false)
    }
  }, [])

  return { generate, chat, isGenerating, error }
}

What’s Next?