Complete Example
Here’s a complete example to get you started with on-device text generation:
import com.runanywhere.sdk.core.onnx.ONNX
import com.runanywhere.sdk.foundation.bridge.extensions.CppBridgeModelPaths
import com.runanywhere.sdk.llm.llamacpp.LlamaCPP
import com.runanywhere.sdk.public.RunAnywhere
import com.runanywhere.sdk.public.SDKEnvironment
import com.runanywhere.sdk.storage.AndroidPlatformContext
class MainActivity : AppCompatActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
// 1. Initialize Android platform context (must be first)
AndroidPlatformContext.initialize(this)
// 2. Initialize SDK
RunAnywhere.initialize(environment = SDKEnvironment.DEVELOPMENT)
// 3. Set model storage path
val runanywherePath = filesDir.resolve("runanywhere").also { it.mkdirs() }
CppBridgeModelPaths.setBaseDirectory(runanywherePath.absolutePath)
// 4. Register backends
try {
LlamaCPP.register(priority = 100)
} catch (e: Exception) {
Log.w("SDK", "LlamaCPP VLM registration failed (LLM still works): ${e.message}")
}
ONNX.register(priority = 100)
// 5. Register models
ModelService.registerDefaultModels()
// 6. Download, load, and generate
lifecycleScope.launch {
RunAnywhere.downloadModel("smollm2-360m-instruct-q8_0").collect { progress ->
updateProgress((progress.progress * 100).toInt())
}
RunAnywhere.loadLLMModel("smollm2-360m-instruct-q8_0")
val response = RunAnywhere.chat("What is 2+2?")
showResponse(response)
}
}
}
Step-by-Step Guide
1. Initialize the SDK
Initialize RunAnywhere in your Activity.onCreate(). The order of initialization is critical:
// Step 1: Android platform context (must be first)
AndroidPlatformContext.initialize(this)
// Step 2: SDK initialization
RunAnywhere.initialize(environment = SDKEnvironment.DEVELOPMENT)
// Step 3: Set model storage base directory
val runanywherePath = filesDir.resolve("runanywhere").also { it.mkdirs() }
CppBridgeModelPaths.setBaseDirectory(runanywherePath.absolutePath)
AndroidPlatformContext.initialize(this) must be called before RunAnywhere.initialize().
CppBridgeModelPaths.setBaseDirectory() tells the C++ bridge where to find model files.
Environment Options
| Environment | Log Level | Description |
|---|
DEVELOPMENT | Debug | Full logging, local testing |
STAGING | Info | Staging backend, moderate logging |
PRODUCTION | Warning | Production backend, minimal logging |
2. Register Backend Modules
Register backends after initialization but before registering models:
import com.runanywhere.sdk.llm.llamacpp.LlamaCPP
import com.runanywhere.sdk.core.onnx.ONNX
// Register LlamaCPP for LLM + VLM (wrap in try/catch — VLM may fail on some devices)
try {
LlamaCPP.register(priority = 100)
} catch (e: Exception) {
Log.w("SDK", "LlamaCPP VLM registration failed: ${e.message}")
}
// Register ONNX for STT/TTS
ONNX.register(priority = 100)
LlamaCPP.register() is wrapped in try/catch because VLM native registration may fail if the
.so doesn’t include nativeRegisterVlm. LLM text generation still works because it is
registered before VLM internally.
3. Register Models
Register models with their framework, modality, and download URL:
import com.runanywhere.sdk.core.types.InferenceFramework
import com.runanywhere.sdk.public.extensions.Models.ModelCategory
import com.runanywhere.sdk.public.extensions.Models.ModelFileDescriptor
import com.runanywhere.sdk.public.extensions.registerModel
import com.runanywhere.sdk.public.extensions.registerMultiFileModel
// LLM model
RunAnywhere.registerModel(
id = "smollm2-360m-instruct-q8_0",
name = "SmolLM2 360M Instruct Q8_0",
url = "https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf",
framework = InferenceFramework.LLAMA_CPP,
modality = ModelCategory.LANGUAGE,
memoryRequirement = 400_000_000L
)
// STT model (tar.gz archive)
RunAnywhere.registerModel(
id = "sherpa-onnx-whisper-tiny.en",
name = "Sherpa Whisper Tiny (ONNX)",
url = "https://github.com/RunanywhereAI/sherpa-onnx/releases/.../sherpa-onnx-whisper-tiny.en.tar.gz",
framework = InferenceFramework.ONNX,
modality = ModelCategory.SPEECH_RECOGNITION
)
// VLM multi-file model
RunAnywhere.registerMultiFileModel(
id = "smolvlm-256m-instruct",
name = "SmolVLM 256M Instruct",
files = listOf(
ModelFileDescriptor(url = "https://...SmolVLM-Q8_0.gguf", filename = "SmolVLM-Q8_0.gguf"),
ModelFileDescriptor(url = "https://...mmproj-f16.gguf", filename = "mmproj-f16.gguf"),
),
framework = InferenceFramework.LLAMA_CPP,
modality = ModelCategory.MULTIMODAL,
memoryRequirement = 365_000_000L
)
4. Download and Load Models
Download with progress tracking using Kotlin Flows:
// Download
RunAnywhere.downloadModel("smollm2-360m-instruct-q8_0")
.catch { e -> showError("Download failed: ${e.message}") }
.collect { progress ->
updateProgress((progress.progress * 100).toInt())
}
// Load by modality
RunAnywhere.loadLLMModel("smollm2-360m-instruct-q8_0") // LLM
RunAnywhere.loadSTTModel("sherpa-onnx-whisper-tiny.en") // STT
RunAnywhere.loadTTSVoice("vits-piper-en_US-lessac-medium") // TTS
RunAnywhere.loadVLMModel("smolvlm-256m-instruct") // VLM
5. Generate Text
// Simple chat (returns String)
val response = RunAnywhere.chat("What is 2+2?")
// STT transcription (takes ByteArray of 16kHz mono PCM)
val text = RunAnywhere.transcribe(audioData)
// TTS synthesis
val ttsOutput = RunAnywhere.synthesize("Hello!", TTSOptions())
// ttsOutput.audioData contains WAV ByteArray
6. Stream Responses
For real-time token streaming:
RunAnywhere.generateStream("Tell me a story about AI")
.collect { token ->
textView.append(token)
}
What’s Next?