Skip to main content

Complete Example

Here’s a complete example to get you started with on-device text generation:
MainActivity.kt
import com.runanywhere.sdk.public.RunAnywhere
import com.runanywhere.sdk.public.SDKEnvironment
import com.runanywhere.sdk.public.extensions.*
import com.runanywhere.sdk.core.types.InferenceFramework

class MyApplication : Application() {
    override fun onCreate() {
        super.onCreate()

        // 1. Initialize SDK (fast, ~1-5ms)
        RunAnywhere.initialize(
            apiKey = "your-api-key",    // Optional for development
            environment = SDKEnvironment.DEVELOPMENT
        )
    }
}

// 2. Use in your Activity/Fragment
class MainActivity : AppCompatActivity() {
    private var modelId: String? = null

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)

        lifecycleScope.launch {
            // Register and download a model
            val modelInfo = RunAnywhere.registerModel(
                name = "Qwen 0.5B",
                url = "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q8_0.gguf",
                framework = InferenceFramework.LLAMA_CPP
            )
            modelId = modelInfo.id

            // Download with progress tracking
            RunAnywhere.downloadModel(modelInfo.id).collect { progress ->
                updateProgress((progress.progress * 100).toInt())
            }

            // Load and generate
            RunAnywhere.loadLLMModel(modelInfo.id)

            val result = RunAnywhere.generate(
                prompt = "Explain quantum computing in simple terms",
                options = LLMGenerationOptions(
                    maxTokens = 200,
                    temperature = 0.7f
                )
            )

            showResponse(result.text)
            Log.d("LLM", "Generated in ${result.latencyMs}ms at ${result.tokensPerSecond} tok/s")
        }
    }
}

Step-by-Step Guide

1. Initialize the SDK

Initialize RunAnywhere once in your Application.onCreate():
RunAnywhere.initialize(
    apiKey = "your-api-key",       // Optional for development mode
    baseURL = null,                // Uses default API endpoint
    environment = SDKEnvironment.DEVELOPMENT
)

Environment Options

EnvironmentLog LevelDescription
DEVELOPMENTDebugFull logging, local testing
STAGINGInfoStaging backend, moderate logging
PRODUCTIONWarningProduction backend, minimal logging

2. Register a Model

Register models from HuggingFace or any direct URL:
val modelInfo = RunAnywhere.registerModel(
    name = "Qwen 0.5B",
    url = "https://huggingface.co/.../model.gguf",
    framework = InferenceFramework.LLAMA_CPP
)

3. Download the Model

Download with progress tracking using Kotlin Flows:
RunAnywhere.downloadModel(modelInfo.id).collect { progress ->
    when (progress.state) {
        DownloadState.DOWNLOADING -> {
            val percent = (progress.progress * 100).toInt()
            progressBar.progress = percent
        }
        DownloadState.COMPLETED -> {
            showMessage("Download complete!")
        }
        DownloadState.ERROR -> {
            showError(progress.error ?: "Download failed")
        }
    }
}

4. Load and Generate

// Load the model into memory
RunAnywhere.loadLLMModel(modelInfo.id)

// Simple chat
val response = RunAnywhere.chat("What is 2+2?")

// Or with full metrics
val result = RunAnywhere.generate(
    prompt = "Write a haiku about coding",
    options = LLMGenerationOptions(maxTokens = 50)
)
println("Response: ${result.text}")
println("Tokens/sec: ${result.tokensPerSecond}")

5. Stream Responses

For real-time token streaming:
RunAnywhere.generateStream("Tell me a story about AI")
    .collect { token ->
        // Display tokens as they arrive
        textView.append(token)
    }

What’s Next?