Skip to main content

Complete Example

Here’s a complete example to get you started with on-device text generation:
App.tsx
import React, { useState, useEffect } from 'react';
import { View, Text, Button, ActivityIndicator } from 'react-native';
import { RunAnywhere, SDKEnvironment, ModelCategory } from '@runanywhere/core';
import { LlamaCPP } from '@runanywhere/llamacpp';

export default function App() {
  const [isReady, setIsReady] = useState(false);
  const [response, setResponse] = useState('');
  const [loading, setLoading] = useState(false);

  useEffect(() => {
    async function setup() {
      // 1. Initialize SDK (fast, ~1-5ms)
      await RunAnywhere.initialize({
        environment: SDKEnvironment.Development,
      });

      // 2. Register LlamaCPP module
      LlamaCPP.register();

      // 3. Add a model
      await LlamaCPP.addModel({
        id: 'smollm2-360m',
        name: 'SmolLM2 360M',
        url: 'https://huggingface.co/prithivMLmods/SmolLM2-360M-GGUF/resolve/main/SmolLM2-360M.Q8_0.gguf',
        memoryRequirement: 500_000_000,
      });

      // 4. Download model (shows progress in console)
      await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
        console.log(`Download: ${(progress.progress * 100).toFixed(1)}%`);
      });

      // 5. Load model into memory
      const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m');
      if (modelInfo?.localPath) {
        await RunAnywhere.loadModel(modelInfo.localPath);
      }

      setIsReady(true);
    }

    setup();
  }, []);

  const generateResponse = async () => {
    setLoading(true);
    try {
      const result = await RunAnywhere.generate(
        'Explain quantum computing in simple terms',
        {
          maxTokens: 200,
          temperature: 0.7,
        }
      );
      setResponse(result.text);
      console.log(`Generated in ${result.latencyMs}ms at ${result.performanceMetrics.tokensPerSecond} tok/s`);
    } finally {
      setLoading(false);
    }
  };

  if (!isReady) {
    return (
      <View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
        <ActivityIndicator size="large" />
        <Text>Setting up AI...</Text>
      </View>
    );
  }

  return (
    <View style={{ flex: 1, padding: 20 }}>
      <Button title="Generate" onPress={generateResponse} disabled={loading} />
      {loading && <ActivityIndicator />}
      <Text style={{ marginTop: 20 }}>{response}</Text>
    </View>
  );
}

Step-by-Step Guide

1. Initialize the SDK

Initialize RunAnywhere once when your app starts:
import { RunAnywhere, SDKEnvironment } from '@runanywhere/core'

await RunAnywhere.initialize({
  environment: SDKEnvironment.Development, // No API key needed
})

Environment Options

EnvironmentLog LevelDescription
DevelopmentDebugFull logging, local testing
StagingInfoStaging backend, moderate logging
ProductionWarningProduction backend, minimal logging

2. Register Backend Modules

Register the backend modules you need:
import { LlamaCPP } from '@runanywhere/llamacpp'
import { ONNX } from '@runanywhere/onnx'

// Register LLM backend
LlamaCPP.register()

// Register STT/TTS backend (if needed)
ONNX.register()

3. Add Models

Add models from HuggingFace or any direct URL:
// Add LLM model
await LlamaCPP.addModel({
  id: 'smollm2-360m',
  name: 'SmolLM2 360M',
  url: 'https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf',
  memoryRequirement: 500_000_000,
})

// Add STT model
await ONNX.addModel({
  id: 'whisper-tiny-en',
  name: 'Whisper Tiny English',
  url: 'https://github.com/.../sherpa-onnx-whisper-tiny.en.tar.gz',
  modality: ModelCategory.SpeechRecognition,
  artifactType: ModelArtifactType.TarGzArchive,
  memoryRequirement: 75_000_000,
})

4. Download the Model

Download with progress tracking:
await RunAnywhere.downloadModel('smollm2-360m', (progress) => {
  switch (progress.state) {
    case 'downloading':
      const percent = (progress.progress * 100).toFixed(1)
      console.log(`Downloading: ${percent}%`)
      break
    case 'extracting':
      console.log('Extracting archive...')
      break
    case 'completed':
      console.log('Download complete!')
      break
    case 'failed':
      console.error('Download failed')
      break
  }
})

5. Load and Generate

// Load the model into memory
const modelInfo = await RunAnywhere.getModelInfo('smollm2-360m')
await RunAnywhere.loadModel(modelInfo.localPath)

// Simple chat
const response = await RunAnywhere.chat('What is 2+2?')
console.log(response) // "4"

// Or with full metrics
const result = await RunAnywhere.generate('Write a haiku about coding', { maxTokens: 50 })
console.log('Response:', result.text)
console.log('Tokens/sec:', result.performanceMetrics.tokensPerSecond)

6. Stream Responses

For real-time token streaming:
const streamResult = await RunAnywhere.generateStream('Tell me a story about AI', {
  maxTokens: 200,
})

// Display tokens as they arrive
for await (const token of streamResult.stream) {
  process.stdout.write(token) // Or update UI
}

// Get final metrics
const finalResult = await streamResult.result
console.log('\nSpeed:', finalResult.performanceMetrics.tokensPerSecond, 'tok/s')

Using in a React Component

Here’s a pattern for using the SDK in React components with hooks:
useAI.ts
import { useState, useCallback } from 'react'
import { RunAnywhere, GenerationOptions, GenerationResult } from '@runanywhere/core'

export function useAI() {
  const [isGenerating, setIsGenerating] = useState(false)
  const [error, setError] = useState<string | null>(null)

  const generate = useCallback(
    async (prompt: string, options?: GenerationOptions): Promise<GenerationResult | null> => {
      setIsGenerating(true)
      setError(null)

      try {
        return await RunAnywhere.generate(prompt, options)
      } catch (err) {
        setError(err instanceof Error ? err.message : 'Generation failed')
        return null
      } finally {
        setIsGenerating(false)
      }
    },
    []
  )

  const chat = useCallback(async (prompt: string): Promise<string> => {
    setIsGenerating(true)
    setError(null)

    try {
      return await RunAnywhere.chat(prompt)
    } catch (err) {
      setError(err instanceof Error ? err.message : 'Chat failed')
      return ''
    } finally {
      setIsGenerating(false)
    }
  }, [])

  return { generate, chat, isGenerating, error }
}

What’s Next?