llama.cpp 项目实现前端本地模型调用

fe929074 · 王一诺 · fe929074 · fe929074 · fe929074 · fe929074
Commit fe929074 authored Apr 09, 2025 by 王一诺
Showing with 2672 additions and 0 deletions
.gitignore
.idea/.gitignore
.idea/.name
.idea/compiler.xml
.idea/deploymentTargetSelector.xml
.idea/gradle.xml
.idea/inspectionProfiles/Project_Default.xml
.idea/kotlinc.xml
.idea/migrations.xml
.idea/misc.xml
app/.gitignore
app/build.gradle
app/proguard-rules.pro
app/src/androidTest/java/com/coolook/llama/ExampleInstrumentedTest.kt
app/src/main/AndroidManifest.xml
app/src/main/assets/models/distilgpt2_bin/config.json
app/src/main/assets/models/distilgpt2_bin/generation_config.json
app/src/main/assets/models/distilgpt2_bin/merges.txt
app/src/main/assets/models/distilgpt2_bin/model-zhen.gguf
app/src/main/assets/models/distilgpt2_bin/special_tokens_map.json
--- a/.gitignore
+++ b/.gitignore
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/.idea/.name
+++ b/.idea/.name
+llama-android
\ No newline at end of file
--- a/.idea/compiler.xml
+++ b/.idea/compiler.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CompilerConfiguration">
+    <bytecodeTargetLevel target="17" />
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/deploymentTargetSelector.xml
+++ b/.idea/deploymentTargetSelector.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="deploymentTargetSelector">
+    <selectionStates>
+      <SelectionState runConfigName="app">
+        <option name="selectionMode" value="DROPDOWN" />
+      </SelectionState>
+    </selectionStates>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/gradle.xml
+++ b/.idea/gradle.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="GradleMigrationSettings" migrationVersion="1" />
+  <component name="GradleSettings">
+    <option name="linkedExternalProjectsSettings">
+      <GradleProjectSettings>
+        <option name="externalProjectPath" value="$PROJECT_DIR$" />
+        <option name="gradleJvm" value="#GRADLE_LOCAL_JAVA_HOME" />
+        <option name="modules">
+          <set>
+            <option value="$PROJECT_DIR$" />
+            <option value="$PROJECT_DIR$/app" />
+            <option value="$PROJECT_DIR$/llamalib" />
+          </set>
+        </option>
+        <option name="resolveExternalAnnotations" value="false" />
+      </GradleProjectSettings>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="ComposePreviewDimensionRespectsLimit" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="ComposePreviewMustBeTopLevelFunction" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="ComposePreviewNeedsComposableAnnotation" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="ComposePreviewNotSupportedInUnitTestFiles" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="GlancePreviewDimensionRespectsLimit" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="GlancePreviewMustBeTopLevelFunction" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="GlancePreviewNeedsComposableAnnotation" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="GlancePreviewNotSupportedInUnitTestFiles" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="PreviewAnnotationInFunctionWithParameters" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="PreviewApiLevelMustBeValid" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="PreviewFontScaleMustBeGreaterThanZero" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="PreviewMultipleParameterProviders" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+    <inspection_tool class="PreviewPickerAnnotation" enabled="true" level="ERROR" enabled_by_default="true">
+      <option name="composableFile" value="true" />
+      <option name="previewFile" value="true" />
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/.idea/kotlinc.xml
+++ b/.idea/kotlinc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="KotlinJpsPluginSettings">
+    <option name="version" value="1.9.0" />
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/migrations.xml
+++ b/.idea/migrations.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectMigrations">
+    <option name="MigrateToGradleLocalJavaHome">
+      <set>
+        <option value="$PROJECT_DIR$" />
+      </set>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<project version="4">
+  <component name="ExternalStorageConfigurationManager" enabled="true" />
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="true" project-jdk-name="jbr-17" project-jdk-type="JavaSDK">
+    <output url="file://$PROJECT_DIR$/build/classes" />
+  </component>
+  <component name="ProjectType">
+    <option name="id" value="Android" />
+  </component>
+</project>
\ No newline at end of file
--- a/app/.gitignore
+++ b/app/.gitignore
+/build
\ No newline at end of file
--- a/app/build.gradle
+++ b/app/build.gradle
+plugins {
+    alias(libs.plugins.android.application)
+    alias(libs.plugins.kotlin.android)
+}
+
+android {
+    namespace 'com.coolook.llama'
+    compileSdk 35
+
+    defaultConfig {
+        applicationId "com.coolook.llama"
+        minSdk 26
+        targetSdk 34
+        versionCode 1
+        versionName "1.0"
+
+        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+        vectorDrawables {
+            useSupportLibrary true
+        }
+    }
+
+    buildTypes {
+        release {
+            minifyEnabled false
+            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+        }
+    }
+    compileOptions {
+        sourceCompatibility JavaVersion.VERSION_1_8
+        targetCompatibility JavaVersion.VERSION_1_8
+    }
+    kotlinOptions {
+        jvmTarget = '1.8'
+    }
+    buildFeatures {
+        compose true
+        viewBinding true
+    }
+    composeOptions {
+        kotlinCompilerExtensionVersion '1.5.1'
+    }
+    packaging {
+        resources {
+            excludes += '/META-INF/{AL2.0,LGPL2.1}'
+        }
+    }
+}
+
+dependencies {
+    implementation (project(':llamalib'))
+    implementation libs.androidx.core.ktx
+    implementation libs.androidx.lifecycle.runtime.ktx
+    implementation libs.androidx.activity.compose
+    implementation platform(libs.androidx.compose.bom)
+    implementation libs.androidx.ui
+    implementation libs.androidx.ui.graphics
+    implementation libs.androidx.ui.tooling.preview
+    implementation libs.androidx.material3
+    implementation libs.material
+    implementation libs.androidx.appcompat
+    implementation libs.androidx.constraintlayout
+    implementation libs.androidx.navigation.fragment
+    implementation libs.androidx.navigation.ui
+    implementation project(':llamalib')
+    testImplementation libs.junit
+    androidTestImplementation libs.androidx.junit
+    androidTestImplementation libs.androidx.espresso.core
+    androidTestImplementation platform(libs.androidx.compose.bom)
+    androidTestImplementation libs.androidx.ui.test.junit4
+    debugImplementation libs.androidx.ui.tooling
+    debugImplementation libs.androidx.ui.test.manifest
+}
\ No newline at end of file
--- a/app/proguard-rules.pro
+++ b/app/proguard-rules.pro
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
--- a/app/src/androidTest/java/com/coolook/llama/ExampleInstrumentedTest.kt
+++ b/app/src/androidTest/java/com/coolook/llama/ExampleInstrumentedTest.kt
+package com.coolook.llama
+
+import androidx.test.platform.app.InstrumentationRegistry
+import androidx.test.ext.junit.runners.AndroidJUnit4
+
+import org.junit.Test
+import org.junit.runner.RunWith
+
+import org.junit.Assert.*
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+@RunWith(AndroidJUnit4::class)
+class ExampleInstrumentedTest {
+    @Test
+    fun useAppContext() {
+        // Context of the app under test.
+        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
+        assertEquals("com.coolook.llama", appContext.packageName)
+    }
+}
\ No newline at end of file
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+        xmlns:tools="http://schemas.android.com/tools">
+    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
+
+    <application
+            android:largeHeap="true"
+            android:allowBackup="true"
+            android:dataExtractionRules="@xml/data_extraction_rules"
+            android:fullBackupContent="@xml/backup_rules"
+            android:icon="@mipmap/ic_launcher"
+            android:label="@string/app_name"
+            android:roundIcon="@mipmap/ic_launcher_round"
+            android:supportsRtl="true"
+            android:theme="@style/Theme.Llamaandroid"
+            tools:targetApi="31">
+        <activity
+                android:name=".MainActivity"
+                android:exported="true"
+                android:label="@string/app_name"
+                android:theme="@style/Theme.Llamaandroid">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+
+</manifest>
\ No newline at end of file
--- a/app/src/main/assets/models/distilgpt2_bin/config.json
+++ b/app/src/main/assets/models/distilgpt2_bin/config.json
+{
+  "_num_labels": 1,
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 6,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}
--- a/app/src/main/assets/models/distilgpt2_bin/generation_config.json
+++ b/app/src/main/assets/models/distilgpt2_bin/generation_config.json
+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.51.1"
+}
--- a/app/src/main/assets/models/distilgpt2_bin/merges.txt
+++ b/app/src/main/assets/models/distilgpt2_bin/merges.txt
--- a/app/src/main/assets/models/distilgpt2_bin/model-zhen.gguf
+++ b/app/src/main/assets/models/distilgpt2_bin/model-zhen.gguf
--- a/app/src/main/assets/models/distilgpt2_bin/special_tokens_map.json
+++ b/app/src/main/assets/models/distilgpt2_bin/special_tokens_map.json
+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}
--- a/app/src/main/assets/models/distilgpt2_bin/tokenizer.json
+++ b/app/src/main/assets/models/distilgpt2_bin/tokenizer.json
--- a/app/src/main/assets/models/distilgpt2_bin/tokenizer_config.json
+++ b/app/src/main/assets/models/distilgpt2_bin/tokenizer_config.json
+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 1024,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}
--- a/app/src/main/assets/models/distilgpt2_bin/vocab.json
+++ b/app/src/main/assets/models/distilgpt2_bin/vocab.json
--- a/app/src/main/java/com/coolook/llama/MainActivity.java
+++ b/app/src/main/java/com/coolook/llama/MainActivity.java
+package com.coolook.llama;
+
+import android.app.Activity;
+import android.content.res.AssetManager;
+import android.os.Bundle;
+
+import com.coolook.llamalib.LLamaAndroid;
+import com.google.android.material.snackbar.Snackbar;
+
+import androidx.appcompat.app.AppCompatActivity;
+
+import android.util.Log;
+import android.view.View;
+
+import androidx.navigation.NavController;
+import androidx.navigation.Navigation;
+import androidx.navigation.ui.AppBarConfiguration;
+import androidx.navigation.ui.NavigationUI;
+
+import com.coolook.llama.databinding.ActivityMainBinding;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class MainActivity extends Activity {
+
+    private AppBarConfiguration appBarConfiguration;
+    private ActivityMainBinding binding;
+
+    private LLamaAndroid llamaAndroid;
+    @Override
+    protected void onCreate(Bundle savedInstanceState) {
+        super.onCreate(savedInstanceState);
+
+        binding = ActivityMainBinding.inflate(getLayoutInflater());
+        setContentView(binding.getRoot());
+
+        llamaAndroid = LLamaAndroid.getInstance();
+        try {
+            loadModelFromAssets("model-zhen.gguf");
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+
+        llamaAndroid.send("Hello", false, new LLamaAndroid.CompletionCallback() {
+            @Override
+            public void onToken(String token) {
+                // 每个生成的 token 会回调这里，你可以逐步拼接生成的对话内容
+                Log.d("MainActivity", "Generated token: " + token);
+            }
+
+            @Override
+            public void onComplete() {
+                // 当对话生成完成时会回调这里
+                Log.d("MainActivity", "Dialogue generation completed.");
+            }
+        });
+    }
+
+    public void loadModelFromAssets(String modelFileName) throws Exception{
+        try {
+            // 获取 AssetManager
+            AssetManager assetManager = getAssets();
+            InputStream inputStream = assetManager.open("models/distilgpt2_bin/" + modelFileName);
+
+            // 读取模型文件
+            byte[] modelData = new byte[inputStream.available()];
+            inputStream.read(modelData);
+            inputStream.close();
+
+            // 将模型数据传递给 JNI
+            String modelPath = getFilesDir() + "/model.bin";
+            FileOutputStream fos = new FileOutputStream(modelPath);
+            fos.write(modelData);
+            fos.close();
+
+            // 加载模型
+            llamaAndroid.load(modelPath, new Runnable() {
+                @Override
+                public void run() {
+                    Log.d("MainActivity", "Model loaded successfully!");
+                }
+            }, new LLamaAndroid.ErrorHandler() {
+                @Override
+                public void onError(Exception e) {
+                    Log.e("MainActivity", "Error loading model: ", e);
+                }
+            });
+        } catch (IOException e) {
+            e.printStackTrace();
+            Log.e("MainActivity", "Error reading model file", e);
+        }
+    }
+}
\ No newline at end of file
--- a/app/src/main/res/drawable/ic_launcher_background.xml
+++ b/app/src/main/res/drawable/ic_launcher_background.xml
+<?xml version="1.0" encoding="utf-8"?>
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+        android:height="108dp"
+        android:width="108dp"
+        android:viewportHeight="108"
+        android:viewportWidth="108">
+    <path
+            android:fillColor="#3DDC84"
+            android:pathData="M0,0h108v108h-108z" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M9,0L9,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M19,0L19,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M29,0L29,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M39,0L39,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M49,0L49,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M59,0L59,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M69,0L69,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M79,0L79,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M89,0L89,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M99,0L99,108"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,9L108,9"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,19L108,19"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,29L108,29"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,39L108,39"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,49L108,49"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,59L108,59"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,69L108,69"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,79L108,79"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,89L108,89"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M0,99L108,99"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M19,29L89,29"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M19,39L89,39"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M19,49L89,49"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M19,59L89,59"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M19,69L89,69"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M19,79L89,79"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M29,19L29,89"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M39,19L39,89"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M49,19L49,89"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M59,19L59,89"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M69,19L69,89"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+    <path
+            android:fillColor="#00000000"
+            android:pathData="M79,19L79,89"
+            android:strokeColor="#33FFFFFF"
+            android:strokeWidth="0.8" />
+</vector>
--- a/app/src/main/res/drawable/ic_launcher_foreground.xml
+++ b/app/src/main/res/drawable/ic_launcher_foreground.xml
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+        xmlns:aapt="http://schemas.android.com/aapt"
+        android:width="108dp"
+        android:height="108dp"
+        android:viewportWidth="108"
+        android:viewportHeight="108">
+    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
+        <aapt:attr name="android:fillColor">
+            <gradient
+                    android:startY="49.59793"
+                    android:startX="42.9492"
+                    android:endY="92.4963"
+                    android:endX="85.84757"
+                    android:type="linear">
+                <item
+                        android:color="#44000000"
+                        android:offset="0.0" />
+                <item
+                        android:color="#00000000"
+                        android:offset="1.0" />
+            </gradient>
+        </aapt:attr>
+    </path>
+    <path
+            android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
+            android:fillColor="#FFFFFF"
+            android:fillType="nonZero"
+            android:strokeWidth="1"
+            android:strokeColor="#00000000" />
+</vector>
\ No newline at end of file
--- a/app/src/main/res/layout/activity_main.xml
+++ b/app/src/main/res/layout/activity_main.xml
+<?xml version="1.0" encoding="utf-8"?>
+<androidx.coordinatorlayout.widget.CoordinatorLayout xmlns:android="http://schemas.android.com/apk/res/android"
+        xmlns:app="http://schemas.android.com/apk/res-auto"
+        xmlns:tools="http://schemas.android.com/tools"
+        android:layout_width="match_parent"
+        android:layout_height="match_parent"
+        android:fitsSystemWindows="true"
+        tools:context=".MainActivity">
+
+    <com.google.android.material.appbar.AppBarLayout
+            android:layout_height="wrap_content"
+            android:layout_width="match_parent"
+            android:fitsSystemWindows="true">
+
+        <com.google.android.material.appbar.MaterialToolbar
+                android:id="@+id/toolbar"
+                android:layout_width="match_parent"
+                android:layout_height="?attr/actionBarSize" />
+
+    </com.google.android.material.appbar.AppBarLayout>
+
+
+    <com.google.android.material.floatingactionbutton.FloatingActionButton
+            android:id="@+id/fab"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:layout_gravity="bottom|end"
+            android:layout_marginEnd="@dimen/fab_margin"
+            android:layout_marginBottom="16dp"
+            app:srcCompat="@android:drawable/ic_dialog_email" />
+
+</androidx.coordinatorlayout.widget.CoordinatorLayout>
\ No newline at end of file
--- a/app/src/main/res/mipmap-anydpi/ic_launcher.xml
+++ b/app/src/main/res/mipmap-anydpi/ic_launcher.xml
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
\ No newline at end of file
--- a/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml
+++ b/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
\ No newline at end of file
--- a/app/src/main/res/mipmap-hdpi/ic_launcher.webp
+++ b/app/src/main/res/mipmap-hdpi/ic_launcher.webp
--- a/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
+++ b/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
--- a/app/src/main/res/mipmap-mdpi/ic_launcher.webp
+++ b/app/src/main/res/mipmap-mdpi/ic_launcher.webp
--- a/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
+++ b/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
--- a/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
+++ b/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
--- a/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
+++ b/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
--- a/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
+++ b/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
--- a/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
+++ b/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
--- a/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
+++ b/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
--- a/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
+++ b/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
--- a/app/src/main/res/navigation/nav_graph.xml
+++ b/app/src/main/res/navigation/nav_graph.xml
+<?xml version="1.0" encoding="utf-8"?>
+<navigation xmlns:android="http://schemas.android.com/apk/res/android"
+        xmlns:app="http://schemas.android.com/apk/res-auto"
+        xmlns:tools="http://schemas.android.com/tools"
+        android:id="@+id/nav_graph"
+        app:startDestination="@id/FirstFragment">
+
+    <fragment
+            android:id="@+id/FirstFragment"
+            android:name="com.coolook.llama.FirstFragment"
+            android:label="@string/first_fragment_label"
+            tools:layout="@layout/fragment_first">
+
+        <action
+                android:id="@+id/action_FirstFragment_to_SecondFragment"
+                app:destination="@id/SecondFragment" />
+    </fragment>
+    <fragment
+            android:id="@+id/SecondFragment"
+            android:name="com.coolook.llama.SecondFragment"
+            android:label="@string/second_fragment_label"
+            tools:layout="@layout/fragment_second">
+
+        <action
+                android:id="@+id/action_SecondFragment_to_FirstFragment"
+                app:destination="@id/FirstFragment" />
+    </fragment>
+</navigation>
\ No newline at end of file
--- a/app/src/main/res/values-land/dimens.xml
+++ b/app/src/main/res/values-land/dimens.xml
+<resources>
+    <dimen name="fab_margin">48dp</dimen>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values-night/themes.xml
+++ b/app/src/main/res/values-night/themes.xml
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!-- Base application theme. -->
+    <style name="Base.Theme.Llamaandroid" parent="Theme.Material3.DayNight.NoActionBar">
+        <!-- Customize your dark theme here. -->
+        <!-- <item name="colorPrimary">@color/my_dark_primary</item> -->
+    </style>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values-v23/themes.xml
+++ b/app/src/main/res/values-v23/themes.xml
+<resources xmlns:tools="http://schemas.android.com/tools">
+
+    <style name="Theme.Llamaandroid" parent="Base.Theme.Llamaandroid">
+        <!-- Transparent system bars for edge-to-edge. -->
+        <item name="android:navigationBarColor">@android:color/transparent</item>
+        <item name="android:statusBarColor">@android:color/transparent</item>
+        <item name="android:windowLightStatusBar">?attr/isLightTheme</item>
+    </style>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values-w1240dp/dimens.xml
+++ b/app/src/main/res/values-w1240dp/dimens.xml
+<resources>
+    <dimen name="fab_margin">200dp</dimen>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values-w600dp/dimens.xml
+++ b/app/src/main/res/values-w600dp/dimens.xml
+<resources>
+    <dimen name="fab_margin">48dp</dimen>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values/colors.xml
+++ b/app/src/main/res/values/colors.xml
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+    <color name="purple_200">#FFBB86FC</color>
+    <color name="purple_500">#FF6200EE</color>
+    <color name="purple_700">#FF3700B3</color>
+    <color name="teal_200">#FF03DAC5</color>
+    <color name="teal_700">#FF018786</color>
+    <color name="black">#FF000000</color>
+    <color name="white">#FFFFFFFF</color>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values/dimens.xml
+++ b/app/src/main/res/values/dimens.xml
+<resources>
+    <dimen name="fab_margin">16dp</dimen>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values/strings.xml
+++ b/app/src/main/res/values/strings.xml
+<resources>
+    <string name="app_name">llama-android</string>
+    <!-- Strings used for fragments for navigation -->
+    <string name="first_fragment_label">First Fragment</string>
+    <string name="second_fragment_label">Second Fragment</string>
+    <string name="next">Next</string>
+    <string name="previous">Previous</string>
+
+    <string name="lorem_ipsum">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam in scelerisque sem. Mauris
+        volutpat, dolor id interdum ullamcorper, risus dolor egestas lectus, sit amet mattis purus
+        dui nec risus. Maecenas non sodales nisi, vel dictum dolor. Class aptent taciti sociosqu ad
+        litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse blandit eleifend
+        diam, vel rutrum tellus vulputate quis. Aliquam eget libero aliquet, imperdiet nisl a,
+        ornare ex. Sed rhoncus est ut libero porta lobortis. Fusce in dictum tellus.\n\n
+        Suspendisse interdum ornare ante. Aliquam nec cursus lorem. Morbi id magna felis. Vivamus
+        egestas, est a condimentum egestas, turpis nisl iaculis ipsum, in dictum tellus dolor sed
+        neque. Morbi tellus erat, dapibus ut sem a, iaculis tincidunt dui. Interdum et malesuada
+        fames ac ante ipsum primis in faucibus. Curabitur et eros porttitor, ultricies urna vitae,
+        molestie nibh. Phasellus at commodo eros, non aliquet metus. Sed maximus nisl nec dolor
+        bibendum, vel congue leo egestas.\n\n
+        Sed interdum tortor nibh, in sagittis risus mollis quis. Curabitur mi odio, condimentum sit
+        amet auctor at, mollis non turpis. Nullam pretium libero vestibulum, finibus orci vel,
+        molestie quam. Fusce blandit tincidunt nulla, quis sollicitudin libero facilisis et. Integer
+        interdum nunc ligula, et fermentum metus hendrerit id. Vestibulum lectus felis, dictum at
+        lacinia sit amet, tristique id quam. Cras eu consequat dui. Suspendisse sodales nunc ligula,
+        in lobortis sem porta sed. Integer id ultrices magna, in luctus elit. Sed a pellentesque
+        est.\n\n
+        Aenean nunc velit, lacinia sed dolor sed, ultrices viverra nulla. Etiam a venenatis nibh.
+        Morbi laoreet, tortor sed facilisis varius, nibh orci rhoncus nulla, id elementum leo dui
+        non lorem. Nam mollis ipsum quis auctor varius. Quisque elementum eu libero sed commodo. In
+        eros nisl, imperdiet vel imperdiet et, scelerisque a mauris. Pellentesque varius ex nunc,
+        quis imperdiet eros placerat ac. Duis finibus orci et est auctor tincidunt. Sed non viverra
+        ipsum. Nunc quis augue egestas, cursus lorem at, molestie sem. Morbi a consectetur ipsum, a
+        placerat diam. Etiam vulputate dignissim convallis. Integer faucibus mauris sit amet finibus
+        convallis.\n\n
+        Phasellus in aliquet mi. Pellentesque habitant morbi tristique senectus et netus et
+        malesuada fames ac turpis egestas. In volutpat arcu ut felis sagittis, in finibus massa
+        gravida. Pellentesque id tellus orci. Integer dictum, lorem sed efficitur ullamcorper,
+        libero justo consectetur ipsum, in mollis nisl ex sed nisl. Donec maximus ullamcorper
+        sodales. Praesent bibendum rhoncus tellus nec feugiat. In a ornare nulla. Donec rhoncus
+        libero vel nunc consequat, quis tincidunt nisl eleifend. Cras bibendum enim a justo luctus
+        vestibulum. Fusce dictum libero quis erat maximus, vitae volutpat diam dignissim.
+    </string>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/values/themes.xml
+++ b/app/src/main/res/values/themes.xml
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+
+    <style name="Theme.Llamaandroid" parent="android:Theme.Material.Light.NoActionBar" />
+    <!-- Base application theme. -->
+    <style name="Base.Theme.Llamaandroid" parent="Theme.Material3.DayNight.NoActionBar">
+        <!-- Customize your light theme here. -->
+        <!-- <item name="colorPrimary">@color/my_light_primary</item> -->
+    </style>
+</resources>
\ No newline at end of file
--- a/app/src/main/res/xml/backup_rules.xml
+++ b/app/src/main/res/xml/backup_rules.xml
+<?xml version="1.0" encoding="utf-8"?><!--
+   Sample backup rules file; uncomment and customize as necessary.
+   See https://developer.android.com/guide/topics/data/autobackup
+   for details.
+   Note: This file is ignored for devices older that API 31
+   See https://developer.android.com/about/versions/12/backup-restore
+-->
+<full-backup-content>
+    <!--
+   <include domain="sharedpref" path="."/>
+   <exclude domain="sharedpref" path="device.xml"/>
+-->
+</full-backup-content>
\ No newline at end of file
--- a/app/src/main/res/xml/data_extraction_rules.xml
+++ b/app/src/main/res/xml/data_extraction_rules.xml
+<?xml version="1.0" encoding="utf-8"?><!--
+   Sample data extraction rules file; uncomment and customize as necessary.
+   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
+   for details.
+-->
+<data-extraction-rules>
+    <cloud-backup>
+        <!-- TODO: Use <include> and <exclude> to control what is backed up.
+        <include .../>
+        <exclude .../>
+        -->
+    </cloud-backup>
+    <!--
+    <device-transfer>
+        <include .../>
+        <exclude .../>
+    </device-transfer>
+    -->
+</data-extraction-rules>
\ No newline at end of file
--- a/app/src/test/java/com/coolook/llama/ExampleUnitTest.kt
+++ b/app/src/test/java/com/coolook/llama/ExampleUnitTest.kt
+package com.coolook.llama
+
+import org.junit.Test
+
+import org.junit.Assert.*
+
+/**
+ * Example local unit test, which will execute on the development machine (host).
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+class ExampleUnitTest {
+    @Test
+    fun addition_isCorrect() {
+        assertEquals(4, 2 + 2)
+    }
+}
\ No newline at end of file
--- a/build.gradle
+++ b/build.gradle
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+alias(libs.plugins.android.application) apply false
+    alias(libs.plugins.kotlin.android) apply false
+    alias(libs.plugins.android.library) apply false
+}
\ No newline at end of file
--- a/gradle.properties
+++ b/gradle.properties
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx4g -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. For more details, visit
+# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app's APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Kotlin code style for this project: "official" or "obsolete":
+kotlin.code.style=official
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
+[versions]
+agp = "8.6.0"
+kotlin = "1.9.0"
+coreKtx = "1.15.0"
+junit = "4.13.2"
+junitVersion = "1.2.1"
+espressoCore = "3.6.1"
+lifecycleRuntimeKtx = "2.8.7"
+activityCompose = "1.10.1"
+composeBom = "2024.04.01"
+material = "1.12.0"
+appcompat = "1.7.0"
+constraintlayout = "2.2.1"
+navigationFragment = "2.6.0"
+navigationUi = "2.6.0"
+
+[libraries]
+androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
+junit = { group = "junit", name = "junit", version.ref = "junit" }
+androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
+androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
+androidx-lifecycle-runtime-ktx = { group = "androidx.lifecycle", name = "lifecycle-runtime-ktx", version.ref = "lifecycleRuntimeKtx" }
+androidx-activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" }
+androidx-compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" }
+androidx-ui = { group = "androidx.compose.ui", name = "ui" }
+androidx-ui-graphics = { group = "androidx.compose.ui", name = "ui-graphics" }
+androidx-ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" }
+androidx-ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" }
+androidx-ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" }
+androidx-ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" }
+androidx-material3 = { group = "androidx.compose.material3", name = "material3" }
+material = { group = "com.google.android.material", name = "material", version.ref = "material" }
+androidx-appcompat = { group = "androidx.appcompat", name = "appcompat", version.ref = "appcompat" }
+androidx-constraintlayout = { group = "androidx.constraintlayout", name = "constraintlayout", version.ref = "constraintlayout" }
+androidx-navigation-fragment = { group = "androidx.navigation", name = "navigation-fragment", version.ref = "navigationFragment" }
+androidx-navigation-ui = { group = "androidx.navigation", name = "navigation-ui", version.ref = "navigationUi" }
+
+[plugins]
+android-application = { id = "com.android.application", version.ref = "agp" }
+kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
+android-library = { id = "com.android.library", version.ref = "agp" }
+
--- a/gradle/wrapper/gradle-wrapper.jar
+++ b/gradle/wrapper/gradle-wrapper.jar
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
+#Wed Apr 09 11:52:39 CST 2025
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
--- a/gradlew
+++ b/gradlew
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=`expr $i + 1`
+    done
+    case $i in
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
--- a/gradlew.bat
+++ b/gradlew.bat
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
--- a/llamalib/.gitignore
+++ b/llamalib/.gitignore
+/build
\ No newline at end of file
--- a/llamalib/build.gradle
+++ b/llamalib/build.gradle
+plugins {
+    alias(libs.plugins.android.library)
+}
+
+android {
+    namespace 'com.coolook.llamalib'
+    compileSdk 34
+
+    defaultConfig {
+        minSdk 26
+
+        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+        consumerProguardFiles "consumer-rules.pro"
+
+        externalNativeBuild {
+            cmake {
+                // 设置 CMake 参数
+                arguments "-DLLAMA_CURL=OFF",
+                        "-DLLAMA_BUILD_COMMON=ON",
+                        "-DGGML_LLAMAFILE=OFF",
+                        "-DCMAKE_BUILD_TYPE=Release"
+
+                // 设置 C++ 编译标志
+                cppFlags "-std=c++17"
+
+                // 如果需要传递空参数
+                // arguments ""
+                // cppFlags ""
+            }
+        }
+    }
+
+    buildTypes {
+        release {
+            minifyEnabled false
+            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+        }
+    }
+    compileOptions {
+        sourceCompatibility JavaVersion.VERSION_1_8
+        targetCompatibility JavaVersion.VERSION_1_8
+    }
+
+    externalNativeBuild {
+        cmake {
+            path "src/main/cpp/CMakeLists.txt"
+            version "3.22.1"
+        }
+    }
+}
+
+dependencies {
+
+    implementation libs.androidx.appcompat
+    implementation libs.material
+    testImplementation libs.junit
+    androidTestImplementation libs.androidx.junit
+    androidTestImplementation libs.androidx.espresso.core
+}
\ No newline at end of file
--- a/llamalib/consumer-rules.pro
+++ b/llamalib/consumer-rules.pro
--- a/llamalib/proguard-rules.pro
+++ b/llamalib/proguard-rules.pro
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
--- a/llamalib/src/androidTest/java/com/coolook/llamalib/ExampleInstrumentedTest.java
+++ b/llamalib/src/androidTest/java/com/coolook/llamalib/ExampleInstrumentedTest.java
+package com.coolook.llamalib;
+
+import android.content.Context;
+
+import androidx.test.platform.app.InstrumentationRegistry;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import static org.junit.Assert.*;
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
+ */
+@RunWith(AndroidJUnit4.class)
+public class ExampleInstrumentedTest {
+    @Test
+    public void useAppContext() {
+        // Context of the app under test.
+        Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
+        assertEquals("com.coolook.llamalib.test", appContext.getPackageName());
+    }
+}
\ No newline at end of file
--- a/llamalib/src/main/AndroidManifest.xml
+++ b/llamalib/src/main/AndroidManifest.xml
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android">
+
+</manifest>
\ No newline at end of file
--- a/llamalib/src/main/cpp/CMakeLists.txt
+++ b/llamalib/src/main/cpp/CMakeLists.txt
+# For more information about using CMake with Android Studio, read the
+# documentation: https://d.android.com/studio/projects/add-native-code.html.
+# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
+
+# Sets the minimum CMake version required for this project.
+cmake_minimum_required(VERSION 3.22.1)
+
+# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
+# Since this is the top level CMakeLists.txt, the project name is also accessible
+# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
+# build script scope).
+project("llama-android")
+
+#include(FetchContent)
+#FetchContent_Declare(
+#        llama
+#        GIT_REPOSITORY https://github.com/ggml-org/llama.cpp
+#        GIT_TAG        master
+#)
+
+# Also provides "common"
+#FetchContent_MakeAvailable(llama)
+
+# Creates and names a library, sets it as either STATIC
+# or SHARED, and provides the relative paths to its source code.
+# You can define multiple libraries, and CMake builds them for you.
+# Gradle automatically packages shared libraries with your APK.
+#
+# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
+# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
+# is preferred for the same purpose.
+#
+
+#load local llama.cpp
+add_subdirectory(llama-cpp build-llama)
+
+# In order to load a library into your app from Java/Kotlin, you must call
+# System.loadLibrary() and pass the name of the library defined here;
+# for GameActivity/NativeActivity derived applications, the same library name must be
+# used in the AndroidManifest.xml file.
+add_library(${CMAKE_PROJECT_NAME} SHARED
+        # List C/C++ source files with relative paths to this CMakeLists.txt.
+        llama-android.cpp)
+
+# Specifies libraries CMake should link to your target library. You
+# can link libraries from various origins, such as libraries defined in this
+# build script, prebuilt third-party libraries, or Android system libraries.
+target_link_libraries(${CMAKE_PROJECT_NAME}
+        # List libraries link to the target library
+        llama
+        common
+        android
+        log)
--- a/llamalib/src/main/cpp/llama-android.cpp
+++ b/llamalib/src/main/cpp/llama-android.cpp
--- a/llamalib/src/main/cpp/llama-cpp/AUTHORS
+++ b/llamalib/src/main/cpp/llama-cpp/AUTHORS
--- a/llamalib/src/main/cpp/llama-cpp/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/CMakeLists.txt
+cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
+project("llama.cpp" C CXX)
+include(CheckIncludeFileCXX)
+
+#set(CMAKE_WARN_DEPRECATED YES)
+set(CMAKE_WARN_UNUSED_CLI YES)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif()
+
+# Add path to modules
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+    set(LLAMA_STANDALONE ON)
+
+    include(git-vars)
+
+    # configure project version
+    # TODO
+else()
+    set(LLAMA_STANDALONE OFF)
+endif()
+
+option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
+
+if (EMSCRIPTEN)
+    set(BUILD_SHARED_LIBS_DEFAULT OFF)
+
+    option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
+else()
+    if (MINGW)
+        set(BUILD_SHARED_LIBS_DEFAULT OFF)
+    else()
+        set(BUILD_SHARED_LIBS_DEFAULT ON)
+    endif()
+endif()
+
+option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
+
+if (WIN32)
+    add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
+endif()
+
+if (MSVC)
+    add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
+    add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
+    add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>")
+    add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
+endif()
+
+#
+# option list
+#
+
+# debug
+option(LLAMA_ALL_WARNINGS           "llama: enable all compiler warnings"                   ON)
+option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
+
+# build
+option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF)
+
+# sanitizers
+option(LLAMA_SANITIZE_THREAD    "llama: enable thread sanitizer"    OFF)
+option(LLAMA_SANITIZE_ADDRESS   "llama: enable address sanitizer"   OFF)
+option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
+
+# utils
+option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
+
+# extra artifacts
+option(LLAMA_BUILD_TESTS    "llama: build tests"          ${LLAMA_STANDALONE})
+option(LLAMA_BUILD_EXAMPLES "llama: build examples"       ${LLAMA_STANDALONE})
+option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
+
+# 3rd party libs
+option(LLAMA_CURL       "llama: use libcurl to download model from an URL" ON)
+option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
+
+# Required for relocatable CMake package
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
+
+# override ggml options
+set(GGML_ALL_WARNINGS   ${LLAMA_ALL_WARNINGS})
+set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
+
+# change the default for these ggml options
+if (NOT DEFINED GGML_LLAMAFILE)
+    set(GGML_LLAMAFILE_DEFAULT ON)
+endif()
+
+if (NOT DEFINED GGML_CUDA_GRAPHS)
+    set(GGML_CUDA_GRAPHS_DEFAULT ON)
+endif()
+
+# transition helpers
+function (llama_option_depr TYPE OLD NEW)
+    if (${OLD})
+        message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
+        set(${NEW} ON PARENT_SCOPE)
+    endif()
+endfunction()
+
+llama_option_depr(FATAL_ERROR LLAMA_CUBLAS              GGML_CUDA)
+llama_option_depr(WARNING     LLAMA_CUDA                GGML_CUDA)
+llama_option_depr(WARNING     LLAMA_KOMPUTE             GGML_KOMPUTE)
+llama_option_depr(WARNING     LLAMA_METAL               GGML_METAL)
+llama_option_depr(WARNING     LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
+llama_option_depr(WARNING     LLAMA_NATIVE              GGML_NATIVE)
+llama_option_depr(WARNING     LLAMA_RPC                 GGML_RPC)
+llama_option_depr(WARNING     LLAMA_SYCL                GGML_SYCL)
+llama_option_depr(WARNING     LLAMA_SYCL_F16            GGML_SYCL_F16)
+llama_option_depr(WARNING     LLAMA_CANN                GGML_CANN)
+
+if (NOT MSVC)
+    if (LLAMA_SANITIZE_THREAD)
+        message(STATUS "Using -fsanitize=thread")
+
+        add_compile_options(-fsanitize=thread)
+        link_libraries     (-fsanitize=thread)
+    endif()
+
+    if (LLAMA_SANITIZE_ADDRESS)
+        message(STATUS "Using -fsanitize=address")
+
+        add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
+        link_libraries     (-fsanitize=address)
+    endif()
+
+    if (LLAMA_SANITIZE_UNDEFINED)
+        message(STATUS "Using -fsanitize=undefined")
+
+        add_compile_options(-fsanitize=undefined)
+        link_libraries     (-fsanitize=undefined)
+    endif()
+endif()
+
+#
+# 3rd-party
+#
+
+if (LLAMA_USE_SYSTEM_GGML)
+    message(STATUS "Using system-provided libggml, skipping ggml build")
+    find_package(ggml REQUIRED)
+    add_library(ggml ALIAS ggml::ggml)
+endif()
+
+if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
+    add_subdirectory(ggml)
+    # ... otherwise assume ggml is added by a parent CMakeLists.txt
+endif()
+
+#
+# build the library
+#
+
+add_subdirectory(src)
+
+#
+# utils, programs, examples and tests
+#
+
+if (NOT LLAMA_BUILD_COMMON)
+    message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL")
+    set(LLAMA_CURL OFF)
+endif()
+
+if (LLAMA_BUILD_COMMON)
+    add_subdirectory(common)
+endif()
+
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
+    include(CTest)
+    add_subdirectory(tests)
+endif()
+
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
+    add_subdirectory(examples)
+    add_subdirectory(pocs)
+endif()
+
+#
+# install
+#
+
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+
+set(LLAMA_BUILD_NUMBER        ${BUILD_NUMBER})
+set(LLAMA_BUILD_COMMIT        ${BUILD_COMMIT})
+set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
+
+set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header  files")
+set(LLAMA_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files")
+set(LLAMA_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files")
+
+set(LLAMA_PUBLIC_HEADERS
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
+
+set_target_properties(llama
+    PROPERTIES
+        PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
+
+install(TARGETS llama LIBRARY PUBLIC_HEADER)
+
+configure_package_config_file(
+        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in
+        ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
+    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama
+    PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
+              LLAMA_LIB_INSTALL_DIR
+              LLAMA_BIN_INSTALL_DIR )
+
+write_basic_package_version_file(
+        ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
+    VERSION ${LLAMA_INSTALL_VERSION}
+    COMPATIBILITY SameMajorVersion)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
+              ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
+
+install(
+    FILES convert_hf_to_gguf.py
+    PERMISSIONS
+        OWNER_READ
+        OWNER_WRITE
+        OWNER_EXECUTE
+        GROUP_READ
+        GROUP_EXECUTE
+        WORLD_READ
+        WORLD_EXECUTE
+    DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+configure_file(cmake/llama.pc.in
+        "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
+        @ONLY)
+
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+
+#
+# copy the license files
+#
+
+# Check if running in GitHub Actions
+if(DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
+    message(STATUS "Running inside GitHub Actions - copying license files")
+
+    # Copy all files from licenses/ to build/bin/
+    file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
+    foreach(LICENSE_FILE ${LICENSE_FILES})
+        get_filename_component(FILENAME ${LICENSE_FILE} NAME)
+        configure_file(${LICENSE_FILE} "${CMAKE_BINARY_DIR}/bin/${FILENAME}" COPYONLY)
+    endforeach()
+endif()
+
--- a/llamalib/src/main/cpp/llama-cpp/CMakePresets.json
+++ b/llamalib/src/main/cpp/llama-cpp/CMakePresets.json
+{
+  "version": 4,
+  "configurePresets": [
+    {
+        "name":  "base",
+        "hidden": true,
+        "generator":   "Ninja",
+        "binaryDir":   "${sourceDir}/build-${presetName}",
+        "cacheVariables": {
+            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+        }
+    },
+    {
+        "name": "sycl-base",
+        "hidden": true,
+        "generator": "Ninja",
+        "binaryDir": "${sourceDir}/build-${presetName}",
+        "cacheVariables": {
+            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+            "CMAKE_CXX_COMPILER": "icx",
+            "CMAKE_C_COMPILER": "cl",
+            "GGML_SYCL": "ON",
+            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+        }
+    },
+    { "name": "debug",    "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
+    { "name": "release",  "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
+    { "name": "reldbg",   "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
+    { "name": "static",   "hidden": true, "cacheVariables": { "GGML_STATIC":      "ON" } },
+    { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16":    "ON" } },
+    { "name": "vulkan",   "hidden": true, "cacheVariables": { "GGML_VULKAN":      "ON" } },
+
+    {
+        "name": "x64-windows-llvm", "hidden": true,
+        "cacheVariables": {
+            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
+        }
+    },
+
+    {
+        "name": "arm64-windows-msvc", "hidden": true,
+        "architecture": { "value": "arm64",    "strategy": "external" },
+        "toolset":      { "value": "host=x64", "strategy": "external" },
+        "cacheVariables": {
+            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
+        }
+    },
+
+    {
+        "name": "arm64-windows-llvm", "hidden": true,
+        "architecture": { "value": "arm64",    "strategy": "external" },
+        "toolset":      { "value": "host=x64", "strategy": "external" },
+        "cacheVariables": {
+            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
+        }
+    },
+
+    {
+        "name": "arm64-apple-clang", "hidden": true,
+        "architecture": { "value": "arm64",    "strategy": "external" },
+        "toolset":      { "value": "host=x64", "strategy": "external" },
+        "cacheVariables": {
+            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
+        }
+    },
+
+    { "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
+    { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
+    { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
+
+    { "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
+    { "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
+    { "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang",  "reldbg", "static" ] },
+
+    { "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc",  "debug"   ] },
+    { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc",  "reldbg" ] },
+    { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc",  "reldbg", "static" ] },
+
+    { "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
+    { "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
+    { "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
+    { "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
+
+    { "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
+    { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
+    { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
+
+    { "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
+    { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
+    { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
+    { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
+
+    { "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
+    { "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
+  ]
+}
--- a/llamalib/src/main/cpp/llama-cpp/CODEOWNERS
+++ b/llamalib/src/main/cpp/llama-cpp/CODEOWNERS
+# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
+
+/ci/ @ggerganov
+/.devops/*.Dockerfile @ngxson
+/examples/server/ @ngxson
+/ggml/src/ggml-cuda/fattn* @JohannesGaessler
+/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
+/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
+/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
+/ggml/src/ggml-opt.cpp @JohannesGaessler
+/ggml/src/gguf.cpp @JohannesGaessler
--- a/llamalib/src/main/cpp/llama-cpp/CONTRIBUTING.md
+++ b/llamalib/src/main/cpp/llama-cpp/CONTRIBUTING.md
+# Pull requests (for contributors)
+
+- llama.cpp uses the ggml tensor library for model evaluation. If you are unfamiliar with ggml, consider taking a look at the [examples in the ggml repository](https://github.com/ggml-org/ggml/tree/master/examples/). [simple](https://github.com/ggml-org/ggml/tree/master/examples/simple) shows the bare minimum for using ggml. [gpt-2](https://github.com/ggml-org/ggml/tree/master/examples/gpt-2) has minimal implementations for language model inference using GPT-2. [mnist](https://github.com/ggml-org/ggml/tree/master/examples/mnist) demonstrates how to train and evaluate a simple image classifier
+- Test your changes:
+    - Execute [the full CI locally on your machine](ci/README.md) before publishing
+    - Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`)
+    - If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
+    - If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
+- Create separate PRs for each feature or fix. Avoid combining unrelated changes in a single PR
+- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
+- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
+
+# Pull requests (for collaborators)
+
+- Squash-merge PRs
+- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
+- Optionally pick a `<module>` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules
+- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
+
+# Coding guidelines
+
+- Avoid adding third-party dependencies, extra files, extra headers, etc.
+- Always consider cross-compatibility with other operating systems and architectures
+- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
+- Vertical alignment makes things more readable and easier to batch edit
+- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
+- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets
+- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo`
+    - In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary
+    ```cpp
+    // OK
+    llama_context * ctx;
+    const llama_rope_type rope_type;
+
+    // not OK
+    struct llama_context * ctx;
+    const enum llama_rope_type rope_type;
+    ```
+
+    _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
+
+- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` (from clang-tools v15+) to format the added code
+- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
+- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
+- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggml-org/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
+
+![matmul](media/matmul.png)
+
+# Naming guidelines
+
+- Use `snake_case` for function, variable and type names
+- Naming usually optimizes for longest common prefix (see https://github.com/ggml-org/ggml/pull/302#discussion_r1243240963)
+
+    ```cpp
+    // not OK
+    int small_number;
+    int big_number;
+
+    // OK
+    int number_small;
+    int number_big;
+    ```
+
+- Enum values are always in upper case and prefixed with the enum name
+
+    ```cpp
+    enum llama_vocab_type {
+        LLAMA_VOCAB_TYPE_NONE = 0,
+        LLAMA_VOCAB_TYPE_SPM  = 1,
+        LLAMA_VOCAB_TYPE_BPE  = 2,
+        LLAMA_VOCAB_TYPE_WPM  = 3,
+        LLAMA_VOCAB_TYPE_UGM  = 4,
+        LLAMA_VOCAB_TYPE_RWKV = 5,
+    };
+    ```
+
+- The general naming pattern is `<class>_<method>`, with `<method>` being `<action>_<noun>`
+
+    ```cpp
+    llama_model_init();           // class: "llama_model",         method: "init"
+    llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove"
+    llama_sampler_get_seed();     // class: "llama_sampler",       method: "get_seed"
+    llama_set_embeddings();       // class: "llama_context",       method: "set_embeddings"
+    llama_n_threads();            // class: "llama_context",       method: "n_threads"
+    llama_adapter_lora_free();    // class: "llama_adapter_lora",  method: "free"
+    ```
+
+    - The `get` `<action>` can be omitted
+    - The `<noun>` can be omitted if not necessary
+    - The `_context` suffix of the `<class>` is optional. Use it to disambiguate symbols when needed
+    - Use `init`/`free` for constructor/destructor `<action>`
+
+- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else
+
+    ```cpp
+    typedef struct llama_context * llama_context_t;
+
+    enum llama_pooling_type llama_pooling_type(const llama_context_t ctx);
+    ```
+
+    _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_
+
+- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension
+- Python filenames are all lowercase with underscores
+
+- _(TODO: abbreviations usage)_
+
+# Preprocessor directives
+
+- _(TODO: add guidelines with examples and apply them to the codebase)_
+
+    ```cpp
+    #ifdef FOO
+    #endif // FOO
+    ```
+
+# Documentation
+
+- Documentation is a community effort
+- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference
+- When you notice incorrect or outdated documentation, please update it
+
+# Resources
+
+The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects:
+
+https://github.com/ggml-org/llama.cpp/projects
--- a/llamalib/src/main/cpp/llama-cpp/LICENSE
+++ b/llamalib/src/main/cpp/llama-cpp/LICENSE
+MIT License
+
+Copyright (c) 2023-2024 The ggml authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/llamalib/src/main/cpp/llama-cpp/Makefile
+++ b/llamalib/src/main/cpp/llama-cpp/Makefile
--- a/llamalib/src/main/cpp/llama-cpp/README.md
+++ b/llamalib/src/main/cpp/llama-cpp/README.md
--- a/llamalib/src/main/cpp/llama-cpp/SECURITY.md
+++ b/llamalib/src/main/cpp/llama-cpp/SECURITY.md
+# Security Policy
+
+ - [**Using llama.cpp securely**](#using-llamacpp-securely)
+   - [Untrusted models](#untrusted-models)
+   - [Untrusted inputs](#untrusted-inputs)
+   - [Data privacy](#data-privacy)
+   - [Untrusted environments or networks](#untrusted-environments-or-networks)
+   - [Multi-Tenant environments](#multi-tenant-environments)
+ - [**Reporting a vulnerability**](#reporting-a-vulnerability)
+
+## Using llama.cpp securely
+
+### Untrusted models
+Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources.
+
+*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code.
+
+> [!NOTE]
+> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance.
+
+### Untrusted inputs
+
+Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks.
+
+For maximum security when handling untrusted inputs, you may need to employ the following:
+
+* Sandboxing: Isolate the environment where the inference happens.
+* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics.
+* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches.
+* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as:
+    * Validation: Enforce strict rules on allowed characters and data types.
+    * Filtering: Remove potentially malicious scripts or code fragments.
+    * Encoding: Convert special characters into safe representations.
+    * Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)).
+
+### Data privacy
+
+To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors.
+
+### Untrusted environments or networks
+
+If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions:
+* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value
+* Encrypt your data if sending it over the network.
+
+### Multi-Tenant environments
+
+If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks.
+
+1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity.
+
+2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring.
+
+3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk.
+
+4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time.
+
+## Reporting a vulnerability
+
+Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++.
+
+<!-- normal version -->
+However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
+
+Please disclose it as a private [security advisory](https://github.com/ggml-org/llama.cpp/security/advisories/new).
+
+A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
--- a/llamalib/src/main/cpp/llama-cpp/build-xcframework.sh
+++ b/llamalib/src/main/cpp/llama-cpp/build-xcframework.sh
--- a/llamalib/src/main/cpp/llama-cpp/ci/README.md
+++ b/llamalib/src/main/cpp/llama-cpp/ci/README.md
+# CI
+
+In addition to [Github Actions](https://github.com/ggml-org/llama.cpp/actions) `llama.cpp` uses a custom CI framework:
+
+https://github.com/ggml-org/ci
+
+It monitors the `master` branch for new commits and runs the
+[ci/run.sh](https://github.com/ggml-org/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
+to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
+to cover various hardware architectures, including GPU and Apple Silicon instances.
+
+Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
+Only the branches of this repo are monitored for this keyword.
+
+It is a good practice, before publishing changes to execute the full CI locally on your machine:
+
+```bash
+mkdir tmp
+
+# CPU-only build
+bash ./ci/run.sh ./tmp/results ./tmp/mnt
+
+# with CUDA support
+GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+
+# with SYCL support
+source /opt/intel/oneapi/setvars.sh
+GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+
+# with MUSA support
+GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+```
+
+## Running MUSA CI in a Docker Container
+
+Assuming `$PWD` is the root of the `llama.cpp` repository, follow these steps to set up and run MUSA CI in a Docker container:
+
+### 1. Create a local directory to store cached models, configuration files and venv:
+
+```bash
+mkdir -p $HOME/llama.cpp/ci-cache
+```
+
+### 2. Create a local directory to store CI run results:
+
+```bash
+mkdir -p $HOME/llama.cpp/ci-results
+```
+
+### 3. Start a Docker container and run the CI:
+
+```bash
+docker run --privileged -it \
+    -v $HOME/llama.cpp/ci-cache:/ci-cache \
+    -v $HOME/llama.cpp/ci-results:/ci-results \
+    -v $PWD:/ws -w /ws \
+    mthreads/musa:rc3.1.1-devel-ubuntu22.04
+```
+
+Inside the container, execute the following commands:
+
+```bash
+apt update -y && apt install -y bc cmake ccache git python3.10-venv time unzip wget
+git config --global --add safe.directory /ws
+GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache
+```
+
+This setup ensures that the CI runs within an isolated Docker environment while maintaining cached files and results across runs.
--- a/llamalib/src/main/cpp/llama-cpp/ci/run.sh
+++ b/llamalib/src/main/cpp/llama-cpp/ci/run.sh
--- a/llamalib/src/main/cpp/llama-cpp/cmake/arm64-apple-clang.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/arm64-apple-clang.cmake
+set( CMAKE_SYSTEM_NAME Darwin )
+set( CMAKE_SYSTEM_PROCESSOR arm64 )
+
+set( target arm64-apple-darwin-macho )
+
+set( CMAKE_C_COMPILER    clang )
+set( CMAKE_CXX_COMPILER  clang++ )
+
+set( CMAKE_C_COMPILER_TARGET   ${target} )
+set( CMAKE_CXX_COMPILER_TARGET ${target} )
+
+set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
+set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
+
+set( CMAKE_C_FLAGS_INIT   "${arch_c_flags} ${warn_c_flags}" )
+set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
--- a/llamalib/src/main/cpp/llama-cpp/cmake/arm64-windows-llvm.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/arm64-windows-llvm.cmake
+set( CMAKE_SYSTEM_NAME Windows )
+set( CMAKE_SYSTEM_PROCESSOR arm64 )
+
+set( target arm64-pc-windows-msvc )
+
+set( CMAKE_C_COMPILER    clang )
+set( CMAKE_CXX_COMPILER  clang++ )
+
+set( CMAKE_C_COMPILER_TARGET   ${target} )
+set( CMAKE_CXX_COMPILER_TARGET ${target} )
+
+set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
+set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
+
+set( CMAKE_C_FLAGS_INIT   "${arch_c_flags} ${warn_c_flags}" )
+set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
--- a/llamalib/src/main/cpp/llama-cpp/cmake/arm64-windows-msvc.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/arm64-windows-msvc.cmake
+set( CMAKE_SYSTEM_NAME Windows )
+set( CMAKE_SYSTEM_PROCESSOR arm64 )
+
+set( target arm64-pc-windows-msvc )
+set( CMAKE_C_COMPILER_TARGET   ${target} )
+set( CMAKE_CXX_COMPILER_TARGET ${target} )
--- a/llamalib/src/main/cpp/llama-cpp/cmake/build-info.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/build-info.cmake
+set(BUILD_NUMBER 0)
+set(BUILD_COMMIT "unknown")
+set(BUILD_COMPILER "unknown")
+set(BUILD_TARGET "unknown")
+
+# Look for git
+find_package(Git)
+if(NOT Git_FOUND)
+    find_program(GIT_EXECUTABLE NAMES git git.exe)
+    if(GIT_EXECUTABLE)
+        set(Git_FOUND TRUE)
+        message(STATUS "Found Git: ${GIT_EXECUTABLE}")
+    else()
+        message(WARNING "Git not found. Build info will not be accurate.")
+    endif()
+endif()
+
+# Get the commit count and hash
+if(Git_FOUND)
+    execute_process(
+        COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE HEAD
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE RES
+    )
+    if (RES EQUAL 0)
+        set(BUILD_COMMIT ${HEAD})
+    endif()
+    execute_process(
+        COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE COUNT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE RES
+    )
+    if (RES EQUAL 0)
+        set(BUILD_NUMBER ${COUNT})
+    endif()
+endif()
+
+if(MSVC)
+    set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
+    set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
+else()
+    execute_process(
+        COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
+        OUTPUT_VARIABLE OUT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    set(BUILD_COMPILER ${OUT})
+    execute_process(
+        COMMAND ${CMAKE_C_COMPILER} -dumpmachine
+        OUTPUT_VARIABLE OUT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    set(BUILD_TARGET ${OUT})
+endif()
--- a/llamalib/src/main/cpp/llama-cpp/cmake/common.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/common.cmake
+include("ggml/cmake/common.cmake")
+
+function(llama_add_compile_flags)
+    if (LLAMA_FATAL_WARNINGS)
+        if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+            list(APPEND C_FLAGS   -Werror)
+            list(APPEND CXX_FLAGS -Werror)
+        elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+            add_compile_options(/WX)
+        endif()
+    endif()
+
+    if (LLAMA_ALL_WARNINGS)
+        if (NOT MSVC)
+            list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
+                                -Werror=implicit-int -Werror=implicit-function-declaration)
+
+            list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
+
+            list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
+
+            list(APPEND C_FLAGS   ${WARNING_FLAGS})
+            list(APPEND CXX_FLAGS ${WARNING_FLAGS})
+
+            ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
+
+            add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
+                                "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
+        else()
+            # todo : msvc
+            set(C_FLAGS   "" PARENT_SCOPE)
+            set(CXX_FLAGS "" PARENT_SCOPE)
+        endif()
+    endif()
+endfunction()
--- a/llamalib/src/main/cpp/llama-cpp/cmake/git-vars.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/git-vars.cmake
+find_package(Git)
+
+# the commit's SHA1
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_SHA1
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the date of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_DATE
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the subject of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%s
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
--- a/llamalib/src/main/cpp/llama-cpp/cmake/llama-config.cmake.in
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/llama-config.cmake.in
+set(LLAMA_VERSION      @LLAMA_INSTALL_VERSION@)
+set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
+set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
+set(LLAMA_SHARED_LIB   @BUILD_SHARED_LIBS@)
+
+@PACKAGE_INIT@
+
+set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
+set_and_check(LLAMA_LIB_DIR     "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
+set_and_check(LLAMA_BIN_DIR     "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
+
+find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
+
+find_library(llama_LIBRARY llama
+    REQUIRED
+    HINTS ${LLAMA_LIB_DIR}
+    NO_CMAKE_FIND_ROOT_PATH
+)
+
+add_library(llama UNKNOWN IMPORTED)
+set_target_properties(llama
+    PROPERTIES
+        INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
+        INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
+        IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+        IMPORTED_LOCATION "${llama_LIBRARY}"
+        INTERFACE_COMPILE_FEATURES c_std_90
+        POSITION_INDEPENDENT_CODE ON)
+
+check_required_components(Llama)
--- a/llamalib/src/main/cpp/llama-cpp/cmake/llama.pc.in
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/llama.pc.in
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=@CMAKE_INSTALL_PREFIX@
+libdir=@CMAKE_INSTALL_FULL_LIBDIR@
+includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
+
+Name: llama
+Description: Port of Facebook's LLaMA model in C/C++
+Version: @LLAMA_INSTALL_VERSION@
+Libs: -L${libdir} -lggml -lggml-base -lllama
+Cflags: -I${includedir}
--- a/llamalib/src/main/cpp/llama-cpp/cmake/x64-windows-llvm.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/cmake/x64-windows-llvm.cmake
+set( CMAKE_SYSTEM_NAME Windows )
+set( CMAKE_SYSTEM_PROCESSOR x86_64 )
+
+set( CMAKE_C_COMPILER    clang )
+set( CMAKE_CXX_COMPILER  clang++ )
+
+set( arch_c_flags "-march=native" )
+
+set( CMAKE_C_FLAGS_INIT   "${arch_c_flags}" )
+set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
+
--- a/llamalib/src/main/cpp/llama-cpp/common/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/common/CMakeLists.txt
+# common
+
+find_package(Threads REQUIRED)
+
+llama_add_compile_flags()
+
+# Build info header
+#
+
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
+    set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
+
+    # Is git submodule
+    if(NOT IS_DIRECTORY "${GIT_DIR}")
+        file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
+        string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
+        string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS)
+        if (SLASH_POS EQUAL 0)
+            set(GIT_DIR "${REAL_GIT_DIR}")
+        else()
+            set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
+        endif()
+    endif()
+
+    if(EXISTS "${GIT_DIR}/index")
+        set(GIT_INDEX "${GIT_DIR}/index")
+    else()
+        message(WARNING "Git index not found in git repository.")
+        set(GIT_INDEX "")
+    endif()
+else()
+    message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
+    set(GIT_INDEX "")
+endif()
+
+# Add a custom command to rebuild build-info.cpp when .git/index changes
+add_custom_command(
+    OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp"
+    COMMENT "Generating build details from Git"
+    COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
+            -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
+            -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
+    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
+    DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
+    VERBATIM
+)
+set(TARGET build_info)
+add_library(${TARGET} OBJECT build-info.cpp)
+if (BUILD_SHARED_LIBS)
+    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif()
+
+set(TARGET common)
+
+add_library(${TARGET} STATIC
+    arg.cpp
+    arg.h
+    base64.hpp
+    chat.cpp
+    chat.h
+    common.cpp
+    common.h
+    console.cpp
+    console.h
+    json-schema-to-grammar.cpp
+    json.hpp
+    llguidance.cpp
+    log.cpp
+    log.h
+    minja/chat-template.hpp
+    minja/minja.hpp
+    ngram-cache.cpp
+    ngram-cache.h
+    sampling.cpp
+    sampling.h
+    speculative.cpp
+    speculative.h
+    )
+
+if (BUILD_SHARED_LIBS)
+    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif()
+
+set(LLAMA_COMMON_EXTRA_LIBS build_info)
+
+# Use curl to download model url
+if (LLAMA_CURL)
+    find_package(CURL)
+    if (NOT CURL_FOUND)
+        message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
+    endif()
+    target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
+    include_directories(${CURL_INCLUDE_DIRS})
+    find_library(CURL_LIBRARY curl REQUIRED)
+    set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
+endif ()
+
+if (LLAMA_LLGUIDANCE)
+    include(ExternalProject)
+    set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
+    set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
+
+    # Set the correct library file extension based on platform
+    if (WIN32)
+        set(LLGUIDANCE_LIB_NAME "llguidance.lib")
+        # Add Windows-specific libraries
+        set(LLGUIDANCE_PLATFORM_LIBS
+            ws2_32    # Windows Sockets API
+            userenv   # For GetUserProfileDirectoryW
+            ntdll     # For NT functions
+            bcrypt    # For BCryptGenRandom
+        )
+    else()
+        set(LLGUIDANCE_LIB_NAME "libllguidance.a")
+        set(LLGUIDANCE_PLATFORM_LIBS "")
+    endif()
+
+    ExternalProject_Add(llguidance_ext
+        GIT_REPOSITORY https://github.com/guidance-ai/llguidance
+        # v0.7.10:
+        GIT_TAG 0309d2a6bf40abda35344a362edc71e06d5009f8
+        PREFIX ${CMAKE_BINARY_DIR}/llguidance
+        SOURCE_DIR ${LLGUIDANCE_SRC}
+        BUILD_IN_SOURCE TRUE
+        CONFIGURE_COMMAND ""
+        BUILD_COMMAND cargo build --release
+        INSTALL_COMMAND ""
+        BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h
+        UPDATE_COMMAND ""
+    )
+    target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
+
+    add_library(llguidance STATIC IMPORTED)
+    set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME})
+    add_dependencies(llguidance llguidance_ext)
+
+    target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
+    # Add platform libraries to the main target
+    set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
+endif ()
+
+target_include_directories(${TARGET} PUBLIC .)
+target_compile_features   (${TARGET} PUBLIC cxx_std_17)
+target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
--- a/llamalib/src/main/cpp/llama-cpp/common/arg.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/arg.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/arg.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/arg.h
+#pragma once
+
+#include "common.h"
+
+#include <set>
+#include <string>
+#include <vector>
+
+//
+// CLI argument parsing
+//
+
+struct common_arg {
+    std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
+    std::set<enum llama_example> excludes = {};
+    std::vector<const char *> args;
+    const char * value_hint   = nullptr; // help text or example for arg value
+    const char * value_hint_2 = nullptr; // for second arg value
+    const char * env          = nullptr;
+    std::string help;
+    bool is_sparam = false; // is current arg a sampling param?
+    void (*handler_void)   (common_params & params) = nullptr;
+    void (*handler_string) (common_params & params, const std::string &) = nullptr;
+    void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
+    void (*handler_int)    (common_params & params, int) = nullptr;
+
+    common_arg(
+        const std::initializer_list<const char *> & args,
+        const char * value_hint,
+        const std::string & help,
+        void (*handler)(common_params & params, const std::string &)
+    ) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
+
+    common_arg(
+        const std::initializer_list<const char *> & args,
+        const char * value_hint,
+        const std::string & help,
+        void (*handler)(common_params & params, int)
+    ) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
+
+    common_arg(
+        const std::initializer_list<const char *> & args,
+        const std::string & help,
+        void (*handler)(common_params & params)
+    ) : args(args), help(help), handler_void(handler) {}
+
+    // support 2 values for arg
+    common_arg(
+        const std::initializer_list<const char *> & args,
+        const char * value_hint,
+        const char * value_hint_2,
+        const std::string & help,
+        void (*handler)(common_params & params, const std::string &, const std::string &)
+    ) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
+
+    common_arg & set_examples(std::initializer_list<enum llama_example> examples);
+    common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
+    common_arg & set_env(const char * env);
+    common_arg & set_sparam();
+    bool in_example(enum llama_example ex);
+    bool is_exclude(enum llama_example ex);
+    bool get_value_from_env(std::string & output);
+    bool has_value_from_env();
+    std::string to_string();
+};
+
+struct common_params_context {
+    enum llama_example ex = LLAMA_EXAMPLE_COMMON;
+    common_params & params;
+    std::vector<common_arg> options;
+    void(*print_usage)(int, char **) = nullptr;
+    common_params_context(common_params & params) : params(params) {}
+};
+
+// parse input arguments from CLI
+// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
+bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
+
+// function to be used by test-arg-parser
+common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
--- a/llamalib/src/main/cpp/llama-cpp/common/base64.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/base64.hpp
--- a/llamalib/src/main/cpp/llama-cpp/common/build-info.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/build-info.cpp
+int LLAMA_BUILD_NUMBER = 0;
+char const *LLAMA_COMMIT = "unknown";
+char const *LLAMA_COMPILER = "Android (10552028, +pgo, +bolt, +lto, -mlgo, based on r487747d) clang version 17.0.2 (https://android.googlesource.com/toolchain/llvm-project d9f89f4d16663d5012e5c09495f3b30ece3d2362)";
+char const *LLAMA_BUILD_TARGET = "x86_64-apple-darwin23.5.0";
--- a/llamalib/src/main/cpp/llama-cpp/common/build-info.cpp.in
+++ b/llamalib/src/main/cpp/llama-cpp/common/build-info.cpp.in
+int LLAMA_BUILD_NUMBER = @BUILD_NUMBER@;
+char const *LLAMA_COMMIT = "@BUILD_COMMIT@";
+char const *LLAMA_COMPILER = "@BUILD_COMPILER@";
+char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@";
--- a/llamalib/src/main/cpp/llama-cpp/common/chat.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/chat.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/chat.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/chat.h
+// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
+
+#pragma once
+
+#include "common.h"
+#include <string>
+#include <vector>
+
+struct common_chat_templates;
+
+struct common_chat_tool_call {
+    std::string name;
+    std::string arguments;
+    std::string id;
+};
+
+struct common_chat_msg_content_part {
+    std::string type;
+    std::string text;
+};
+
+struct common_chat_msg {
+    std::string role;
+    std::string content;
+    std::vector<common_chat_msg_content_part> content_parts = {};
+    std::vector<common_chat_tool_call> tool_calls = {};
+    std::string reasoning_content;
+    std::string tool_name;
+    std::string tool_call_id;
+};
+
+struct common_chat_tool {
+    std::string name;
+    std::string description;
+    std::string parameters;
+};
+
+enum common_chat_tool_choice {
+    COMMON_CHAT_TOOL_CHOICE_AUTO,
+    COMMON_CHAT_TOOL_CHOICE_REQUIRED,
+    COMMON_CHAT_TOOL_CHOICE_NONE,
+};
+
+enum common_chat_format {
+    COMMON_CHAT_FORMAT_CONTENT_ONLY,
+    COMMON_CHAT_FORMAT_GENERIC,
+    COMMON_CHAT_FORMAT_MISTRAL_NEMO,
+    COMMON_CHAT_FORMAT_LLAMA_3_X,
+    COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
+    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
+    COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
+    COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
+    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
+    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
+    COMMON_CHAT_FORMAT_HERMES_2_PRO,
+    COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
+    COMMON_CHAT_FORMAT_COMMAND_R7B,
+    COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
+
+    COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
+};
+
+struct common_chat_templates_inputs {
+    std::vector<common_chat_msg> messages;
+    std::string grammar;
+    std::string json_schema;
+    bool add_generation_prompt = true;
+    bool use_jinja = true;
+    // Parameters below only supported when use_jinja is true
+    std::vector<common_chat_tool> tools;
+    common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    bool parallel_tool_calls = false;
+    bool extract_reasoning     = true;
+};
+
+struct common_chat_params {
+    common_chat_format                  format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+    std::string                         prompt;
+    std::string                         grammar;
+    bool                                grammar_lazy = false;
+    std::vector<common_grammar_trigger> grammar_triggers;
+    std::vector<std::string>            preserved_tokens;
+    std::vector<std::string>            additional_stops;
+};
+
+// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
+bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
+
+void common_chat_templates_free(struct common_chat_templates * tmpls);
+
+struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
+
+typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
+
+common_chat_templates_ptr common_chat_templates_init(
+                                    const struct llama_model * model,
+                                           const std::string & chat_template_override,
+                                           const std::string & bos_token_override = "",
+                                           const std::string & eos_token_override = "");
+
+bool         common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
+const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr);
+
+
+struct common_chat_params      common_chat_templates_apply(
+    const struct common_chat_templates * tmpls,
+    const struct common_chat_templates_inputs & inputs);
+
+// Format single message, while taking into account the position of that message in chat history
+std::string common_chat_format_single(
+        const struct common_chat_templates * tmpls,
+        const std::vector<common_chat_msg> & past_msg,
+        const common_chat_msg & new_msg,
+        bool add_ass,
+        bool use_jinja);
+
+// Returns an example of formatted chat
+std::string common_chat_format_example(
+    const struct common_chat_templates * tmpls,
+    bool use_jinja);
+
+std::string               common_chat_format_name(common_chat_format format);
+common_chat_msg           common_chat_parse(      const std::string & input, common_chat_format format);
+
+common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
+
+// Parses a JSON array of messages in OpenAI's chat completion API format.
+// T can be std::string containing JSON or nlohmann::ordered_json
+template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
+template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
+
+// Parses a JSON array of tools in OpenAI's chat completion tool call API format.
+// T can be std::string containing JSON or nlohmann::ordered_json
+template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
+template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
--- a/llamalib/src/main/cpp/llama-cpp/common/cmake/build-info-gen-cpp.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/common/cmake/build-info-gen-cpp.cmake
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
+
+set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
+set(OUTPUT_FILE   "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
+
+# Only write the build info if it changed
+if(EXISTS ${OUTPUT_FILE})
+    file(READ ${OUTPUT_FILE} CONTENTS)
+    string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
+    set(OLD_COMMIT ${CMAKE_MATCH_1})
+    string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
+    set(OLD_COMPILER ${CMAKE_MATCH_1})
+    string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
+    set(OLD_TARGET ${CMAKE_MATCH_1})
+    if (
+        NOT OLD_COMMIT   STREQUAL BUILD_COMMIT   OR
+        NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
+        NOT OLD_TARGET   STREQUAL BUILD_TARGET
+    )
+        configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
+    endif()
+else()
+    configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
+endif()
--- a/llamalib/src/main/cpp/llama-cpp/common/common.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/common.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/common.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/common.h
--- a/llamalib/src/main/cpp/llama-cpp/common/console.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/console.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/console.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/console.h
--- a/llamalib/src/main/cpp/llama-cpp/common/json-schema-to-grammar.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/json-schema-to-grammar.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/json-schema-to-grammar.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/json-schema-to-grammar.h
--- a/llamalib/src/main/cpp/llama-cpp/common/json.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/json.hpp
--- a/llamalib/src/main/cpp/llama-cpp/common/llguidance.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/llguidance.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/log.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/log.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/log.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/log.h
--- a/llamalib/src/main/cpp/llama-cpp/common/minja/chat-template.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/minja/chat-template.hpp
--- a/llamalib/src/main/cpp/llama-cpp/common/minja/minja.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/minja/minja.hpp
--- a/llamalib/src/main/cpp/llama-cpp/common/ngram-cache.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/ngram-cache.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/ngram-cache.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/ngram-cache.h
--- a/llamalib/src/main/cpp/llama-cpp/common/sampling.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/sampling.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/sampling.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/sampling.h
--- a/llamalib/src/main/cpp/llama-cpp/common/speculative.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/common/speculative.cpp
--- a/llamalib/src/main/cpp/llama-cpp/common/speculative.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/speculative.h
--- a/llamalib/src/main/cpp/llama-cpp/common/stb_image.h
+++ b/llamalib/src/main/cpp/llama-cpp/common/stb_image.h
--- a/llamalib/src/main/cpp/llama-cpp/convert_hf_to_gguf.py
+++ b/llamalib/src/main/cpp/llama-cpp/convert_hf_to_gguf.py
--- a/llamalib/src/main/cpp/llama-cpp/convert_hf_to_gguf_update.py
+++ b/llamalib/src/main/cpp/llama-cpp/convert_hf_to_gguf_update.py
--- a/llamalib/src/main/cpp/llama-cpp/convert_llama_ggml_to_gguf.py
+++ b/llamalib/src/main/cpp/llama-cpp/convert_llama_ggml_to_gguf.py
--- a/llamalib/src/main/cpp/llama-cpp/convert_lora_to_gguf.py
+++ b/llamalib/src/main/cpp/llama-cpp/convert_lora_to_gguf.py
--- a/llamalib/src/main/cpp/llama-cpp/docs/android.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/android.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/backend/BLIS.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/backend/BLIS.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/backend/CANN.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/backend/CANN.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/backend/CUDA-FEDORA.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/backend/CUDA-FEDORA.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/backend/OPENCL.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/backend/OPENCL.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/backend/SYCL.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/backend/SYCL.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/build.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/build.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/development/HOWTO-add-model.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/development/HOWTO-add-model.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/development/debugging-tests.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/development/debugging-tests.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/development/llama-star/idea-arch.key
+++ b/llamalib/src/main/cpp/llama-cpp/docs/development/llama-star/idea-arch.key
--- a/llamalib/src/main/cpp/llama-cpp/docs/development/llama-star/idea-arch.pdf
+++ b/llamalib/src/main/cpp/llama-cpp/docs/development/llama-star/idea-arch.pdf
--- a/llamalib/src/main/cpp/llama-cpp/docs/development/token_generation_performance_tips.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/development/token_generation_performance_tips.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/docker.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/docker.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/function-calling.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/function-calling.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/install.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/install.md
--- a/llamalib/src/main/cpp/llama-cpp/docs/llguidance.md
+++ b/llamalib/src/main/cpp/llama-cpp/docs/llguidance.md
--- a/llamalib/src/main/cpp/llama-cpp/flake.lock
+++ b/llamalib/src/main/cpp/llama-cpp/flake.lock
--- a/llamalib/src/main/cpp/llama-cpp/flake.nix
+++ b/llamalib/src/main/cpp/llama-cpp/flake.nix
--- a/llamalib/src/main/cpp/llama-cpp/ggml/.gitignore
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/.gitignore
--- a/llamalib/src/main/cpp/llama-cpp/ggml/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/cmake/GitVars.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/cmake/GitVars.cmake
--- a/llamalib/src/main/cpp/llama-cpp/ggml/cmake/common.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/cmake/common.cmake
--- a/llamalib/src/main/cpp/llama-cpp/ggml/cmake/ggml-config.cmake.in
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/cmake/ggml-config.cmake.in
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-alloc.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-alloc.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-backend.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-backend.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-blas.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-blas.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cann.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cann.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cpp.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cpp.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cpu.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cpu.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cuda.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-cuda.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-kompute.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-kompute.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-metal.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-metal.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-opencl.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-opencl.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-opt.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-opt.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-rpc.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-rpc.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-sycl.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-sycl.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-vulkan.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml-vulkan.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/ggml.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/include/gguf.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/include/gguf.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-alloc.c
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-alloc.c
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-backend-impl.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-backend-impl.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-backend-reg.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-backend-reg.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-backend.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-backend.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-blas/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-blas/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-blas/ggml-blas.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-blas/ggml-blas.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/Doxyfile
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/Doxyfile
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/acl_tensor.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/acl_tensor.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/acl_tensor.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/acl_tensor.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/aclnn_ops.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/aclnn_ops.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/aclnn_ops.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/common.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/common.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cann/ggml-cann.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-common.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-common.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/amx.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/amx.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/amx.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/amx.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/common.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/common.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/mmq.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/mmq.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/mmq.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/amx/mmq.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/binary-ops.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/binary-ops.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/binary-ops.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/binary-ops.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/common.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/common.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu.c
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ggml-cpu.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kernels.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kernels.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/llamafile/sgemm.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/llamafile/sgemm.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ops.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ops.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ops.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/ops.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/simd-mappings.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/simd-mappings.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/unary-ops.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/unary-ops.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/unary-ops.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/unary-ops.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/vec.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/vec.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/vec.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cpu/vec.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/acc.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/acc.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/acc.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/acc.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/arange.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/arange.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/arange.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/arange.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argmax.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argmax.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argmax.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argmax.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argsort.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argsort.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argsort.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/argsort.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/binbcast.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/binbcast.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/binbcast.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/binbcast.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/clamp.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/clamp.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/clamp.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/clamp.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/common.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/common.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/concat.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/concat.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/concat.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/concat.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/convert.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/convert.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/convert.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/convert.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/count-equal.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/count-equal.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/count-equal.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/count-equal.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cp-async.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cp-async.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cpy.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cpy.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cpy.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cpy.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/dequantize.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/dequantize.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/diagmask.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/diagmask.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/diagmask.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/diagmask.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-common.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-common.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/fattn.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/getrows.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/getrows.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/getrows.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/getrows.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ggml-cuda.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/gla.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/gla.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/gla.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/gla.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/im2col.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/im2col.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/im2col.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/im2col.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mma.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mma.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmq.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmq.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmq.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmq.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmv.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmv.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmv.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmv.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmvq.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmvq.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmvq.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/mmvq.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/norm.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/norm.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/norm.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/norm.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/opt-step-adamw.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/opt-step-adamw.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/out-prod.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/out-prod.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/out-prod.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/out-prod.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pad.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pad.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pad.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pad.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pool2d.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pool2d.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pool2d.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/pool2d.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/quantize.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/quantize.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/quantize.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/quantize.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/rope.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/rope.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/rope.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/rope.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/scale.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/scale.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/scale.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/scale.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/softmax.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/softmax.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/softmax.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/softmax.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-conv.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-conv.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-conv.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-conv.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-scan.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-scan.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-scan.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/ssm-scan.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sum.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sum.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sum.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sum.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sumrows.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sumrows.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sumrows.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/sumrows.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/tsembd.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/tsembd.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/tsembd.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/tsembd.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/unary.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/unary.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/unary.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/unary.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/upscale.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/upscale.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/upscale.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/upscale.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vecdotq.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vecdotq.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vendors/cuda.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vendors/cuda.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vendors/hip.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vendors/hip.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vendors/musa.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/vendors/musa.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/wkv.cu
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/wkv.cu
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/wkv.cuh
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-cuda/wkv.cuh
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-hip/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-hip/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-impl.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-impl.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/ggml-kompute.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/ggml-kompute.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/ggml-metal-impl.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/ggml-metal-impl.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/ggml-metal.m
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/ggml-metal.m
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/ggml-metal.metal
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-metal/ggml-metal.metal
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-musa/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-musa/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/ggml-opencl.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/ggml-opencl.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_cvt.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_cvt.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_gemv_noshuffle.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_gemv_noshuffle.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_gemv_noshuffle_general.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_gemv_noshuffle_general.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_im2col.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_im2col.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_mm.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_mm.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_transpose_16.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_transpose_16.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_transpose_32.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_transpose_32.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_transpose_32_16.cl
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opencl/kernels/ggml-opencl_transpose_32_16.cl
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opt.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-opt.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-quants.c
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-quants.c
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-quants.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-quants.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-rpc/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-rpc/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-rpc/ggml-rpc.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-rpc/ggml-rpc.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/backend.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/backend.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/common.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/common.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/common.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/common.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/concat.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/concat.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/concat.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/concat.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/conv.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/conv.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/conv.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/conv.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/convert.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/convert.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/convert.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/convert.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/cpy.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/cpy.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/cpy.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/cpy.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dequantize.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dequantize.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dmmv.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dmmv.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dmmv.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dmmv.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dpct/helper.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/dpct/helper.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/element_wise.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/element_wise.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/element_wise.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/element_wise.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/gemm.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/gemm.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/getrows.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/getrows.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/getrows.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/getrows.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/ggml-sycl.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/gla.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/gla.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/gla.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/gla.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/im2col.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/im2col.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/im2col.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/im2col.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmq.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmq.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmq.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmq.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmvq.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmvq.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmvq.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/mmvq.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/norm.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/norm.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/norm.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/norm.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/outprod.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/outprod.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/outprod.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/outprod.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/presets.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/presets.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/rope.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/rope.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/rope.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/rope.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/softmax.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/softmax.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/softmax.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/softmax.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/sycl_hw.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/sycl_hw.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/sycl_hw.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/sycl_hw.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/tsembd.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/tsembd.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/tsembd.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/tsembd.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/vecdotq.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/vecdotq.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/wkv.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/wkv.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/wkv.hpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-sycl/wkv.hpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-threading.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-threading.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-threading.h
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-threading.h
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml.c
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/ggml.c
--- a/llamalib/src/main/cpp/llama-cpp/ggml/src/gguf.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/ggml/src/gguf.cpp
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/LICENSE
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/LICENSE
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/README.md
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/README.md
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/examples/reader.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/examples/reader.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/examples/writer.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/examples/writer.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/__init__.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/__init__.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/constants.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/constants.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/gguf.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/gguf.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/gguf_reader.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/gguf_reader.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/gguf_writer.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/gguf_writer.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/lazy.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/lazy.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/metadata.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/metadata.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/py.typed
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/py.typed
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/quants.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/quants.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/__init__.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/__init__.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_convert_endian.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_convert_endian.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_dump.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_dump.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_hash.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_hash.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_new_metadata.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_new_metadata.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_set_metadata.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/scripts/gguf_set_metadata.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/tensor_mapping.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/tensor_mapping.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/utility.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/utility.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/vocab.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/gguf/vocab.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/pyproject.toml
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/pyproject.toml
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/tests/__init__.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/tests/__init__.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/tests/test_metadata.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/tests/test_metadata.py
--- a/llamalib/src/main/cpp/llama-cpp/gguf-py/tests/test_quants.py
+++ b/llamalib/src/main/cpp/llama-cpp/gguf-py/tests/test_quants.py
--- a/llamalib/src/main/cpp/llama-cpp/grammars/README.md
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/README.md
--- a/llamalib/src/main/cpp/llama-cpp/grammars/arithmetic.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/arithmetic.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/grammars/c.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/c.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/grammars/chess.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/chess.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/grammars/english.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/english.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/grammars/japanese.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/japanese.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/grammars/json.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/json.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/grammars/json_arr.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/json_arr.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/grammars/list.gbnf
+++ b/llamalib/src/main/cpp/llama-cpp/grammars/list.gbnf
--- a/llamalib/src/main/cpp/llama-cpp/include/llama-cpp.h
+++ b/llamalib/src/main/cpp/llama-cpp/include/llama-cpp.h
--- a/llamalib/src/main/cpp/llama-cpp/include/llama.h
+++ b/llamalib/src/main/cpp/llama-cpp/include/llama.h
--- a/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-curl
+++ b/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-curl
--- a/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-httplib
+++ b/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-httplib
--- a/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-jsonhpp
+++ b/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-jsonhpp
--- a/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-linenoise
+++ b/llamalib/src/main/cpp/llama-cpp/licenses/LICENSE-linenoise
--- a/llamalib/src/main/cpp/llama-cpp/media/llama0-banner.png
+++ b/llamalib/src/main/cpp/llama-cpp/media/llama0-banner.png
--- a/llamalib/src/main/cpp/llama-cpp/media/llama0-logo.png
+++ b/llamalib/src/main/cpp/llama-cpp/media/llama0-logo.png
--- a/llamalib/src/main/cpp/llama-cpp/media/llama1-banner.png
+++ b/llamalib/src/main/cpp/llama-cpp/media/llama1-banner.png
--- a/llamalib/src/main/cpp/llama-cpp/media/llama1-logo.png
+++ b/llamalib/src/main/cpp/llama-cpp/media/llama1-logo.png
--- a/llamalib/src/main/cpp/llama-cpp/media/llama1-logo.svg
+++ b/llamalib/src/main/cpp/llama-cpp/media/llama1-logo.svg
--- a/llamalib/src/main/cpp/llama-cpp/media/matmul.png
+++ b/llamalib/src/main/cpp/llama-cpp/media/matmul.png
--- a/llamalib/src/main/cpp/llama-cpp/media/matmul.svg
+++ b/llamalib/src/main/cpp/llama-cpp/media/matmul.svg
--- a/llamalib/src/main/cpp/llama-cpp/models/.editorconfig
+++ b/llamalib/src/main/cpp/llama-cpp/models/.editorconfig
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-aquila.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-aquila.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-baichuan.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-baichuan.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-bert-bge.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-bert-bge.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-bert-bge.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-bert-bge.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-bert-bge.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-bert-bge.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-chameleon.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-chameleon.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-chameleon.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-chameleon.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-command-r.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-command-r.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-command-r.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-command-r.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-command-r.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-command-r.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-coder.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-coder.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-coder.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-coder.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-coder.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-coder.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-llm.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-llm.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-llm.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-llm.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-llm.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-llm.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-falcon.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-falcon.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-falcon.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-falcon.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-falcon.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-falcon.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-2.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-2.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-2.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-2.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-2.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-2.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-4o.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-4o.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-4o.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-4o.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-neox.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-gpt-neox.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-bpe.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-bpe.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-bpe.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-bpe.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-bpe.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-bpe.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-spm.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-spm.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-spm.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-spm.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-spm.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama-spm.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama4.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama4.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama4.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-llama4.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-mpt.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-mpt.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-mpt.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-mpt.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-mpt.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-mpt.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-phi-3.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-phi-3.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-phi-3.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-phi-3.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-phi-3.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-phi-3.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-qwen2.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-qwen2.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-qwen2.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-qwen2.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-qwen2.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-qwen2.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-refact.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-refact.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-refact.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-refact.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-refact.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-refact.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-roberta-bpe.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-roberta-bpe.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-roberta-bpe.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-roberta-bpe.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-starcoder.gguf
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-starcoder.gguf
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-starcoder.gguf.inp
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-starcoder.gguf.inp
--- a/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-starcoder.gguf.out
+++ b/llamalib/src/main/cpp/llama-cpp/models/ggml-vocab-starcoder.gguf.out
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/README.md
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/README.md
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/google-gemma-2-2b-it.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/google-gemma-2-2b-it.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/llama-cpp-deepseek-r1.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/llama-cpp-deepseek-r1.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/meetkai-functionary-medium-v3.1.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/meetkai-functionary-medium-v3.1.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/meetkai-functionary-medium-v3.2.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/meetkai-functionary-medium-v3.2.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja
--- a/llamalib/src/main/cpp/llama-cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja
+++ b/llamalib/src/main/cpp/llama-cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja
--- a/llamalib/src/main/cpp/llama-cpp/mypy.ini
+++ b/llamalib/src/main/cpp/llama-cpp/mypy.ini
--- a/llamalib/src/main/cpp/llama-cpp/pocs/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/pocs/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/pocs/vdot/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/pocs/vdot/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/pocs/vdot/q8dot.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/pocs/vdot/q8dot.cpp
--- a/llamalib/src/main/cpp/llama-cpp/pocs/vdot/vdot.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/pocs/vdot/vdot.cpp
--- a/llamalib/src/main/cpp/llama-cpp/poetry.lock
+++ b/llamalib/src/main/cpp/llama-cpp/poetry.lock
--- a/llamalib/src/main/cpp/llama-cpp/prompts/LLM-questions.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/LLM-questions.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/alpaca.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/alpaca.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/assistant.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/assistant.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-baichuan.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-baichuan.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-bob.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-bob.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-qwen.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-qwen.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-vicuna-v0.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-vicuna-v0.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-vicuna-v1.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/chat-with-vicuna-v1.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/chat.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/chat.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/dan-modified.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/dan-modified.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/dan.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/dan.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/mnemonics.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/mnemonics.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/parallel-questions.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/parallel-questions.txt
--- a/llamalib/src/main/cpp/llama-cpp/prompts/reason-act.txt
+++ b/llamalib/src/main/cpp/llama-cpp/prompts/reason-act.txt
--- a/llamalib/src/main/cpp/llama-cpp/pyproject.toml
+++ b/llamalib/src/main/cpp/llama-cpp/pyproject.toml
--- a/llamalib/src/main/cpp/llama-cpp/pyrightconfig.json
+++ b/llamalib/src/main/cpp/llama-cpp/pyrightconfig.json
--- a/llamalib/src/main/cpp/llama-cpp/requirements.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-all.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-all.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-compare-llama-bench.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-compare-llama-bench.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_hf_to_gguf.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_hf_to_gguf.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_hf_to_gguf_update.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_hf_to_gguf_update.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_legacy_llama.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_legacy_llama.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_lora_to_gguf.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-convert_lora_to_gguf.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-pydantic.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-pydantic.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-test-tokenizer-random.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-test-tokenizer-random.txt
--- a/llamalib/src/main/cpp/llama-cpp/requirements/requirements-tool_bench.txt
+++ b/llamalib/src/main/cpp/llama-cpp/requirements/requirements-tool_bench.txt
--- a/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-apps.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-apps.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-ios.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-ios.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-macos.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-macos.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-tvos.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-tvos.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-visionos.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/apple/validate-visionos.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/build-info.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/build-info.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/check-requirements.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/check-requirements.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/ci-run.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/ci-run.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/compare-commits.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/compare-commits.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/compare-llama-bench.py
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/compare-llama-bench.py
--- a/llamalib/src/main/cpp/llama-cpp/scripts/debug-test.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/debug-test.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/fetch_server_test_models.py
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/fetch_server_test_models.py
--- a/llamalib/src/main/cpp/llama-cpp/scripts/gen-authors.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/gen-authors.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/gen-unicode-data.py
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/gen-unicode-data.py
--- a/llamalib/src/main/cpp/llama-cpp/scripts/get-flags.mk
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/get-flags.mk
--- a/llamalib/src/main/cpp/llama-cpp/scripts/get-hellaswag.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/get-hellaswag.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/get-pg.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/get-pg.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/get-wikitext-103.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/get-wikitext-103.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/get-wikitext-2.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/get-wikitext-2.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/get-winogrande.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/get-winogrande.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/get_chat_template.py
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/get_chat_template.py
--- a/llamalib/src/main/cpp/llama-cpp/scripts/hf.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/hf.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/install-oneapi.bat
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/install-oneapi.bat
--- a/llamalib/src/main/cpp/llama-cpp/scripts/qnt-all.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/qnt-all.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/run-all-perf.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/run-all-perf.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/run-all-ppl.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/run-all-ppl.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/sync-ggml-am.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/sync-ggml-am.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/sync-ggml.last
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/sync-ggml.last
--- a/llamalib/src/main/cpp/llama-cpp/scripts/sync-ggml.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/sync-ggml.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/tool_bench.py
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/tool_bench.py
--- a/llamalib/src/main/cpp/llama-cpp/scripts/tool_bench.sh
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/tool_bench.sh
--- a/llamalib/src/main/cpp/llama-cpp/scripts/verify-checksum-models.py
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/verify-checksum-models.py
--- a/llamalib/src/main/cpp/llama-cpp/scripts/xxd.cmake
+++ b/llamalib/src/main/cpp/llama-cpp/scripts/xxd.cmake
--- a/llamalib/src/main/cpp/llama-cpp/src/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/src/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-adapter.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-adapter.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-adapter.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-adapter.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-arch.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-arch.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-arch.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-arch.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-batch.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-batch.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-batch.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-batch.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-chat.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-chat.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-chat.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-chat.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-context.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-context.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-context.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-context.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-cparams.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-cparams.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-cparams.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-cparams.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-grammar.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-grammar.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-grammar.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-grammar.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-graph.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-graph.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-graph.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-graph.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-hparams.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-hparams.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-hparams.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-hparams.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-impl.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-impl.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-impl.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-impl.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-io.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-io.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-io.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-io.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-kv-cache.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-kv-cache.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-kv-cache.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-kv-cache.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-memory.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-memory.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-memory.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-memory.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-mmap.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-mmap.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-mmap.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-mmap.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-model-loader.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-model-loader.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-model-loader.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-model-loader.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-model.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-model.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-model.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-model.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-quant.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-quant.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-quant.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-quant.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-sampling.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-sampling.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-sampling.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-sampling.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-vocab.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-vocab.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/llama-vocab.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama-vocab.h
--- a/llamalib/src/main/cpp/llama-cpp/src/llama.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/llama.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/unicode-data.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/unicode-data.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/unicode-data.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/unicode-data.h
--- a/llamalib/src/main/cpp/llama-cpp/src/unicode.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/src/unicode.cpp
--- a/llamalib/src/main/cpp/llama-cpp/src/unicode.h
+++ b/llamalib/src/main/cpp/llama-cpp/src/unicode.h
--- a/llamalib/src/main/cpp/llama-cpp/tests/.gitignore
+++ b/llamalib/src/main/cpp/llama-cpp/tests/.gitignore
--- a/llamalib/src/main/cpp/llama-cpp/tests/CMakeLists.txt
+++ b/llamalib/src/main/cpp/llama-cpp/tests/CMakeLists.txt
--- a/llamalib/src/main/cpp/llama-cpp/tests/get-model.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/get-model.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/get-model.h
+++ b/llamalib/src/main/cpp/llama-cpp/tests/get-model.h
--- a/llamalib/src/main/cpp/llama-cpp/tests/run-json-schema-to-grammar.mjs
+++ b/llamalib/src/main/cpp/llama-cpp/tests/run-json-schema-to-grammar.mjs
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-arg-parser.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-arg-parser.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-autorelease.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-autorelease.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-backend-ops.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-backend-ops.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-barrier.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-barrier.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-c.c
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-c.c
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-chat-template.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-chat-template.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-chat.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-chat.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-double-float.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-double-float.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-gguf.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-gguf.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-grammar-integration.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-grammar-integration.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-grammar-llguidance.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-grammar-llguidance.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-grammar-parser.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-grammar-parser.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-json-schema-to-grammar.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-json-schema-to-grammar.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-llama-grammar.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-llama-grammar.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-log.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-log.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-lora-conversion-inference.sh
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-lora-conversion-inference.sh
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-model-load-cancel.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-model-load-cancel.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-opt.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-opt.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-quantize-fns.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-quantize-fns.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-quantize-perf.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-quantize-perf.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-rope.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-rope.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-sampling.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-sampling.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-0.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-0.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-0.py
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-0.py
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-0.sh
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-0.sh
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-1-bpe.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-1-bpe.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-1-spm.cpp
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-1-spm.cpp
--- a/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-random.py
+++ b/llamalib/src/main/cpp/llama-cpp/tests/test-tokenizer-random.py
--- a/llamalib/src/main/java/com/coolook/llamalib/LLamaAndroid.java
+++ b/llamalib/src/main/java/com/coolook/llamalib/LLamaAndroid.java
--- a/llamalib/src/test/java/com/coolook/llamalib/ExampleUnitTest.java
+++ b/llamalib/src/test/java/com/coolook/llamalib/ExampleUnitTest.java
--- a/settings.gradle
+++ b/settings.gradle