Skip to content

Misc. bug: Model not loaded on Android with NDK #13399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
ferranpons opened this issue May 9, 2025 · 2 comments
Open

Misc. bug: Model not loaded on Android with NDK #13399

ferranpons opened this issue May 9, 2025 · 2 comments

Comments

@ferranpons
Copy link

ferranpons commented May 9, 2025

Name and Version

version: b5320
built with macOS Sonoma, Android Studio Meerkat 2024.3.1 Patch 2 and Android NDK 27.012077973

Operating systems

Other? (Please let us know in description), Mac

Which llama.cpp modules do you know to be affected?

libllama (core library)

Command line

Problem description & steps to reproduce

I'm trying to use llama.cpp on Android with local inference using NDK with JNI. When I try to load a model (nomic_embed_text_v1_5_q4_0.gguf) with the "llama_model_load_from_file" method, it does not load and returns null.

CMakeLists.txt

cmake_minimum_required(VERSION 3.22.1)
project(llama_jni)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")

# Path to llama.cpp folder is in the root folder of the project
set(LLAMA_CPP_DIR "${CMAKE_SOURCE_DIR}/../../../../llama.cpp")
set(LLAMA_CPP_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../../llama.cpp/src")
set(LLAMA_CPP_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../../llama.cpp/include")
set(LLAMA_GGML_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../../llama.cpp/ggml/src")
set(LLAMA_GGML_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../../llama.cpp/ggml/include")

add_definitions(
        -DANDROID_ABI=arm64-v8a
        -DANDROID_PLATFORM=android-28
        -DCMAKE_C_FLAGS="-march=armv8.7a"
        -DCMAKE_CXX_FLAGS="-march=armv8.7a"
        -DGGML_OPENMP=OFF
        -DGGML_LLAMAFILE=OFF
        -B build-android
        -DLLAMA_CURL=OFF
)

include_directories(
        ${CMAKE_SOURCE_DIR}
        ${LLAMA_CPP_DIR}
        ${LLAMA_CPP_INCLUDE_DIR}
        ${LLAMA_GGML_SRC_DIR}
        ${LLAMA_GGML_INCLUDE_DIR}
)

# Collect llama.cpp source files
file(GLOB LLAMA_SOURCES
        ${LLAMA_CPP_SRC_DIR}/*.cpp
        ${LLAMA_GGML_SRC_DIR}/*.cpp
        ${LLAMA_GGML_SRC_DIR}/*.c
        ${LLAMA_GGML_INCLUDE_DIR}
)

add_library(llama_jni SHARED
        llama_jni.cpp
        llama_embed.cpp
        ${LLAMA_SOURCES}
)

# Required Android libraries
find_library(log-lib log)

target_include_directories(llama_jni PRIVATE
        ${LLAMA_CPP_DIR}
        ${LLAMA_CPP_INCLUDE_DIR}
        ${LLAMA_GGML_SRC_DIR}
        ${LLAMA_GGML_INCLUDE_DIR}
        ${CMAKE_CURRENT_SOURCE_DIR}
        ${CMAKE_SOURCE_DIR}/../c_interop/include  # location of llama.h
        ${LLAMA_SOURCES}
)

target_link_libraries(
        llama_jni
        ${log-lib}
)

llama_embed.cpp

static struct llama_model *model = nullptr;
static struct llama_context *ctx = nullptr;
static int embedding_size = 0;

bool llama_embed_init(const char *model_path) {
    __android_log_print(ANDROID_LOG_INFO, "llama_jni", "Initializing llama...");
    llama_backend_init();

    __android_log_print(ANDROID_LOG_INFO, "llama_jni", "Model loading...");
    llama_model_params model_params = llama_model_default_params();

    __android_log_print(ANDROID_LOG_INFO, "llama_jni", "File Path: %s", model_path);
    if (std::filesystem::exists(model_path)) {
        __android_log_print(ANDROID_LOG_INFO, "llama_jni", "Model file exists: %s", model_path);
    }

    model = llama_model_load_from_file(model_path, model_params);
    if (!model) return false;
    __android_log_print(ANDROID_LOG_INFO, "llama_jni", "Model loaded successfully.");

    __android_log_print(ANDROID_LOG_INFO, "llama_jni", "Context creating...");
    llama_context_params ctx_params = llama_context_default_params();
    ctx_params.embeddings = true;
    ctx = llama_init_from_model(model, ctx_params);
    if (!ctx) return false;
    __android_log_print(ANDROID_LOG_INFO, "llama_jni", "Context created successfully.");

    embedding_size = llama_model_n_embd(model);

    return true;
}

First Bad Commit

No response

Relevant log output

No log raised in Logcat
@ferranpons
Copy link
Author

There's no result after applying the changes on this PR 13395.
Before applying that code, it crashed, raising this error:

 A  Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x78 in tid 32248 (hub.app.android), pid 32248 (hub.app.android)
Cmdline: com.dcshub.app.android
2025-05-09 11:01:23.750 32426-32426 DEBUG                   
pid-32426  A  pid: 32248, tid: 32248, name: hub.app.android  >>> com.dcshub.app.android <<<
#00 pc 000000000029ce7c  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (ggml_backend_dev_backend_reg+12) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)
#01 pc 0000000000424968  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)
#02 pc 00000000003fbbb0  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (llama_model::load_tensors(llama_model_loader&)+224) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)
#03 pc 00000000004edd78  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)
#04 pc 00000000004ecab0  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)
#05 pc 00000000004ec54c  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (llama_model_load_from_file+116) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)
#06 pc 00000000002883b8  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (llama_embed_init+412) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)
#07 pc 0000000000288060  /data/app/~~f83A2nAr1S3n_npEawz13A==/com.dcshub.app.android-D1Yk2l7clmJpmjO1R6A--w==/base.apk!libllama_jni.so (offset 0xcab4000) (Java_com_dcshub_app_platform_LlamaBridge_initModel+48) (BuildId: de628c8e9468c84e55a6c95f618773dd49c7c4ae)

It seems related to not being able to load the Backend. I tried to set different flags and properties to force the CPU as the backend, but I had the same results. 😢

@slaren
Copy link
Member

slaren commented May 9, 2025

You should use the llama.cpp and ggml cmake scripts rather than trying to build your own, there are several details that you may get wrong and lead to issues like this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants