From 90f11edf0b233b4d0190cc47c64c26dcb6b6b30c Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 18:42:47 +0200
Subject: [PATCH 01/13] feat: add Vulkan support

---
 .github/ISSUE_TEMPLATE/bug-report.yml         |  4 ++
 README.md                                     |  2 +-
 docs/index.md                                 |  2 +-
 llama/CMakeLists.txt                          | 16 ++++++
 llama/addon.cpp                               | 14 ++++++
 llama/gpuInfo/vulkan-gpu-info.cpp             | 50 +++++++++++++++++++
 llama/gpuInfo/vulkan-gpu-info.h               |  5 ++
 package.json                                  |  1 +
 src/bindings/Llama.ts                         | 10 +++-
 src/bindings/getLlama.ts                      | 15 +++++-
 src/bindings/types.ts                         |  3 +-
 src/bindings/utils/compileLLamaCpp.ts         |  3 ++
 .../getBuildFolderNameForBuildOptions.ts      |  3 ++
 .../utils/resolveCustomCmakeOptions.ts        |  1 +
 src/cli/commands/BuildCommand.ts              | 22 ++++----
 src/cli/commands/DebugCommand.ts              | 17 +++----
 src/cli/commands/DownloadCommand.ts           | 23 +++++----
 src/cli/utils/logEnabledComputeLayers.ts      | 21 ++++++++
 src/config.ts                                 |  3 ++
 src/utils/getBuildDefaults.ts                 |  5 +-
 20 files changed, 183 insertions(+), 37 deletions(-)
 create mode 100644 llama/gpuInfo/vulkan-gpu-info.cpp
 create mode 100644 llama/gpuInfo/vulkan-gpu-info.h
 create mode 100644 src/cli/utils/logEnabledComputeLayers.ts

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 3a7d80cc..cf75e5e3 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -76,8 +76,12 @@ body:
           required: false
         - label: CUDA support
           required: false
+        - label: Vulkan support
+          required: false
         - label: Grammar
           required: false
+        - label: Function calling
+          required: false
   - type: dropdown
     id: pr
     attributes:
diff --git a/README.md b/README.md
index b0a54ef9..d4e7bbaf 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
 
 ## Features
 * Run a text generation model locally on your machine
-* Metal and CUDA support
+* Metal, CUDA and Vulkan support
 * Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python
 * Chat with a model using a chat wrapper
 * Use the CLI to chat with a model without writing any code
diff --git a/docs/index.md b/docs/index.md
index 4cf9ba07..40484332 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -20,7 +20,7 @@ hero:
 
 features:
   - icon: 🚀
-    title: Metal and CUDA support
+    title: Metal, CUDA and Vulkan support
     details: Utilize the power of your GPU to run AI models faster
     link: /guide/#cuda-and-metal-support
     linkText: Learn more
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index a92404be..5dc5e66a 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -58,6 +58,22 @@ if (LLAMA_CUBLAS)
     endif()
 endif()
 
+if (LLAMA_VULKAN)
+    find_package(Vulkan)
+    if (Vulkan_FOUND)
+        message(STATUS "Using Vulkan for GPU info")
+
+        set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h)
+        set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp)
+
+        add_compile_definitions(GPU_INFO_USE_VULKAN)
+
+        set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} Vulkan::Vulkan)
+    else()
+        message(WARNING "Vulkan not found. Not using it for GPU info")
+    endif()
+endif()
+
 if (LLAMA_HIPBLAS)
     list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
 
diff --git a/llama/addon.cpp b/llama/addon.cpp
index acb44ea1..1991a03b 100644
--- a/llama/addon.cpp
+++ b/llama/addon.cpp
@@ -12,6 +12,9 @@
 #ifdef GPU_INFO_USE_CUBLAS
 #  include "gpuInfo/cuda-gpu-info.h"
 #endif
+#ifdef GPU_INFO_USE_VULKAN
+#  include "gpuInfo/vulkan-gpu-info.h"
+#endif
 #ifdef GPU_INFO_USE_METAL
 #  include "gpuInfo/metal-gpu-info.h"
 #endif
@@ -71,6 +74,17 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
     }
 #endif
 
+#ifdef GPU_INFO_USE_VULKAN
+    uint64_t vulkanDeviceTotal = 0;
+    uint64_t vulkanDeviceUsed = 0;
+    const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed);
+
+    if (vulkanDeviceSupportsMemoryBudgetExtension) {
+        total += vulkanDeviceTotal;
+        used += vulkanDeviceUsed;
+    }
+#endif
+
 #ifdef GPU_INFO_USE_METAL
     uint64_t metalDeviceTotal = 0;
     uint64_t metalDeviceUsed = 0;
diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
new file mode 100644
index 00000000..4f4091a9
--- /dev/null
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -0,0 +1,50 @@
+#include <stddef.h>
+#include <vulkan/vulkan>
+// #include <vulkan/vulkan.hpp>
+
+bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
+    vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION);
+    vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
+    vk::Instance instance = vk::createInstance(createInfo);
+    
+    auto physicalDevices = instance.enumeratePhysicalDevices();
+
+    size_t usedMem = 0;
+    size_t totalMem = 0;
+
+    for (size_t i = 0; i < physicalDevices.size(); i++) {
+        vk::PhysicalDevice physicalDevice = physicalDevices[i];
+        vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
+
+        std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
+        bool memoryBudgetExtensionSupported = std::any_of(
+            extensionProperties.begin(), 
+            extensionProperties.end(), 
+            [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME; }
+        );
+
+        if (memoryBudgetExtensionSupported) {
+            vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
+            vk::PhysicalDeviceMemoryProperties2 memProps2 = {};
+            memProps2.pNext = &memoryBudgetProperties;
+
+            physicalDevice.getMemoryProperties2(&memProps2);
+
+            for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
+                if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
+                    totalMem += memProps.memoryHeaps[i].size;
+                    usedMem += memoryBudgetProperties.heapUsage[i];
+                    break;
+                }
+            }
+        } else {
+            // VK_EXT_memory_budget extension is not supported, se we cannot determine used memory
+            std::cerr << "VK_EXT_memory_budget extension not supported" << std::endl;
+            return false;
+        }
+    }
+
+    *total = totalMem;
+    *used = usedMem;
+    return true;
+}
diff --git a/llama/gpuInfo/vulkan-gpu-info.h b/llama/gpuInfo/vulkan-gpu-info.h
new file mode 100644
index 00000000..ccd1dfe3
--- /dev/null
+++ b/llama/gpuInfo/vulkan-gpu-info.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <stddef.h>
+
+bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used);
diff --git a/package.json b/package.json
index 4cd8de6a..b966142b 100644
--- a/package.json
+++ b/package.json
@@ -82,6 +82,7 @@
     "gguf",
     "metal",
     "cuda",
+    "vulkan",
     "grammar",
     "json-grammar",
     "json-schema-grammar",
diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts
index d4fd2d71..7f2e6c26 100644
--- a/src/bindings/Llama.ts
+++ b/src/bindings/Llama.ts
@@ -20,6 +20,7 @@ export class Llama {
     /** @internal */ public readonly _bindings: BindingModule;
     /** @internal */ private readonly _metal: boolean;
     /** @internal */ private readonly _cuda: boolean;
+    /** @internal */ private readonly _vulkan: boolean;
     /** @internal */ private readonly _buildType: "localBuild" | "prebuilt";
     /** @internal */ private readonly _cmakeOptions: Readonly<Record<string, string>>;
     /** @internal */ private readonly _llamaCppRelease: {
@@ -36,11 +37,12 @@ export class Llama {
     /** @internal */ private _nextLogNeedNewLine: boolean = false;
 
     private constructor({
-        bindings, metal, cuda, logLevel, logger, buildType, cmakeOptions, llamaCppRelease
+        bindings, metal, cuda, vulkan, logLevel, logger, buildType, cmakeOptions, llamaCppRelease
     }: {
         bindings: BindingModule
         metal: boolean,
         cuda: boolean,
+        vulkan: boolean,
         logLevel: LlamaLogLevel,
         logger: (level: LlamaLogLevel, message: string) => void,
         buildType: "localBuild" | "prebuilt",
@@ -53,6 +55,7 @@ export class Llama {
         this._bindings = bindings;
         this._metal = metal;
         this._cuda = cuda;
+        this._vulkan = vulkan;
         this._logLevel = logLevel ?? LlamaLogLevel.debug;
         this._logger = logger;
         this._buildType = buildType;
@@ -77,6 +80,10 @@ export class Llama {
         return this._cuda;
     }
 
+    public get vulkan() {
+        return this._vulkan;
+    }
+
     public get logLevel() {
         return this._logLevel;
     }
@@ -213,6 +220,7 @@ export class Llama {
             buildType,
             metal: buildMetadata.buildOptions.computeLayers.metal,
             cuda: buildMetadata.buildOptions.computeLayers.cuda,
+            vulkan: buildMetadata.buildOptions.computeLayers.vulkan,
             cmakeOptions: buildMetadata.buildOptions.customCmakeOptions,
             llamaCppRelease: {
                 repo: buildMetadata.buildOptions.llamaCpp.repo,
diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts
index e7816b5f..11685fd5 100644
--- a/src/bindings/getLlama.ts
+++ b/src/bindings/getLlama.ts
@@ -4,7 +4,7 @@ import console from "console";
 import {createRequire} from "module";
 import {
     builtinLlamaCppGitHubRepo, builtinLlamaCppRelease, defaultLlamaCppCudaSupport, defaultLlamaCppDebugLogs, defaultLlamaCppGitHubRepo,
-    defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultSkipDownload, llamaLocalBuildBinsDirectory
+    defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultLlamaCppVulkanSupport, defaultSkipDownload, llamaLocalBuildBinsDirectory
 } from "../config.js";
 import {getConsoleLogPrefix} from "../utils/getConsoleLogPrefix.js";
 import {waitForLockfileRelease} from "../utils/waitForLockfileRelease.js";
@@ -39,6 +39,12 @@ export type LlamaOptions = {
      */
     cuda?: boolean,
 
+    /**
+     * Toggle Vulkan support on llama.cpp.
+     * Disabled by default.
+     */
+    vulkan?: boolean,
+
     /**
      * Set the minimum log level for llama.cpp.
      * Defaults to "debug".
@@ -184,6 +190,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp
 export async function getLlamaForOptions({
     metal = defaultLlamaCppMetalSupport,
     cuda = defaultLlamaCppCudaSupport,
+    vulkan = defaultLlamaCppVulkanSupport,
     logLevel = defaultLlamaCppDebugLogs,
     logger = Llama.defaultConsoleLogger,
     build = "auto",
@@ -220,7 +227,8 @@ export async function getLlamaForOptions({
         arch,
         computeLayers: {
             metal,
-            cuda
+            cuda,
+            vulkan
         },
         llamaCpp: {
             repo: clonedLlamaCppRepoReleaseInfo?.llamaCppGithubRepo ?? builtinLlamaCppGitHubRepo,
@@ -356,6 +364,9 @@ function describeBinary(binaryOptions: BuildOptions) {
     if (binaryOptions.computeLayers.cuda)
         additions.push("with CUDA support");
 
+    if (binaryOptions.computeLayers.vulkan)
+        additions.push("with Vulkan support");
+
     if (binaryOptions.customCmakeOptions.size > 0)
         additions.push("with custom build options");
 
diff --git a/src/bindings/types.ts b/src/bindings/types.ts
index c5beadcb..a5a89baa 100644
--- a/src/bindings/types.ts
+++ b/src/bindings/types.ts
@@ -8,7 +8,8 @@ export type BuildOptions = {
     arch: typeof process.arch,
     computeLayers: {
         metal: boolean,
-        cuda: boolean
+        cuda: boolean,
+        vulkan: boolean
     },
     llamaCpp: {
         repo: string,
diff --git a/src/bindings/utils/compileLLamaCpp.ts b/src/bindings/utils/compileLLamaCpp.ts
index 24eace3a..d43767f3 100644
--- a/src/bindings/utils/compileLLamaCpp.ts
+++ b/src/bindings/utils/compileLLamaCpp.ts
@@ -64,6 +64,9 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, {
             if (buildOptions.computeLayers.cuda && !cmakeCustomOptions.has("LLAMA_CUBLAS"))
                 cmakeCustomOptions.set("LLAMA_CUBLAS", "1");
 
+            if (buildOptions.computeLayers.vulkan && !cmakeCustomOptions.has("LLAMA_VULKAN"))
+                cmakeCustomOptions.set("LLAMA_VULKAN", "1");
+
             if (toolchainFile != null && !cmakeCustomOptions.has("CMAKE_TOOLCHAIN_FILE"))
                 cmakeCustomOptions.set("CMAKE_TOOLCHAIN_FILE", toolchainFile);
 
diff --git a/src/bindings/utils/getBuildFolderNameForBuildOptions.ts b/src/bindings/utils/getBuildFolderNameForBuildOptions.ts
index 36bb850f..64ea524d 100644
--- a/src/bindings/utils/getBuildFolderNameForBuildOptions.ts
+++ b/src/bindings/utils/getBuildFolderNameForBuildOptions.ts
@@ -10,6 +10,9 @@ export async function getBuildFolderNameForBuildOptions(buildOptions: BuildOptio
     else if (buildOptions.computeLayers.cuda)
         nameParts.push("cuda");
 
+    if (buildOptions.computeLayers.vulkan)
+        nameParts.push("vulkan");
+
     if (buildOptions.llamaCpp.repo !== builtinLlamaCppGitHubRepo || buildOptions.llamaCpp.release !== builtinLlamaCppRelease)
         nameParts.push("release-" + await getFolderNamePartForRelease(buildOptions.llamaCpp.repo, buildOptions.llamaCpp.release));
 
diff --git a/src/bindings/utils/resolveCustomCmakeOptions.ts b/src/bindings/utils/resolveCustomCmakeOptions.ts
index 1d269bd5..f9a11006 100644
--- a/src/bindings/utils/resolveCustomCmakeOptions.ts
+++ b/src/bindings/utils/resolveCustomCmakeOptions.ts
@@ -8,6 +8,7 @@ export function resolveCustomCmakeOptions(customCmakeOptions?: Record<string, st
 
     if (process.env.LLAMA_METAL === "1") newCustomCmakeOptions.set("LLAMA_METAL", "1");
     if (process.env.LLAMA_CUBLAS === "1") newCustomCmakeOptions.set("LLAMA_CUBLAS", "1");
+    if (process.env.LLAMA_VULKAN === "1") newCustomCmakeOptions.set("LLAMA_VULKAN", "1");
 
     if (process.env.LLAMA_MPI === "1") newCustomCmakeOptions.set("LLAMA_MPI", "1");
     if (process.env.LLAMA_OPENBLAS === "1") newCustomCmakeOptions.set("LLAMA_OPENBLAS", "1");
diff --git a/src/cli/commands/BuildCommand.ts b/src/cli/commands/BuildCommand.ts
index af96cc09..41ae8d07 100644
--- a/src/cli/commands/BuildCommand.ts
+++ b/src/cli/commands/BuildCommand.ts
@@ -5,7 +5,8 @@ import {compileLlamaCpp} from "../../bindings/utils/compileLLamaCpp.js";
 import withOra from "../../utils/withOra.js";
 import {clearTempFolder} from "../../utils/clearTempFolder.js";
 import {
-    builtinLlamaCppGitHubRepo, builtinLlamaCppRelease, defaultLlamaCppCudaSupport, defaultLlamaCppMetalSupport, isCI
+    builtinLlamaCppGitHubRepo, builtinLlamaCppRelease, defaultLlamaCppMetalSupport, defaultLlamaCppCudaSupport,
+    defaultLlamaCppVulkanSupport, isCI
 } from "../../config.js";
 import {downloadCmakeIfNeeded} from "../../utils/cmake.js";
 import withStatusLogs from "../../utils/withStatusLogs.js";
@@ -15,12 +16,14 @@ import {getPlatform} from "../../bindings/utils/getPlatform.js";
 import {resolveCustomCmakeOptions} from "../../bindings/utils/resolveCustomCmakeOptions.js";
 import {getClonedLlamaCppRepoReleaseInfo, isLlamaCppRepoCloned} from "../../bindings/utils/cloneLlamaCppRepo.js";
 import {BuildOptions} from "../../bindings/types.js";
+import {logEnabledComputeLayers} from "../utils/logEnabledComputeLayers.js";
 
 type BuildCommand = {
     arch?: string,
     nodeTarget?: string,
     metal?: boolean,
     cuda?: boolean,
+    vulkan?: boolean,
     noUsageExample?: boolean,
 
     /** @internal */
@@ -54,6 +57,11 @@ export const BuildCommand: CommandModule<object, BuildCommand> = {
                 default: defaultLlamaCppCudaSupport,
                 description: "Compile llama.cpp with CUDA support. Can also be set via the NODE_LLAMA_CPP_CUDA environment variable"
             })
+            .option("vulkan", {
+                type: "boolean",
+                default: defaultLlamaCppVulkanSupport,
+                description: "Compile llama.cpp with Vulkan support. Can also be set via the NODE_LLAMA_CPP_VULKAN environment variable"
+            })
             .option("noUsageExample", {
                 alias: "nu",
                 type: "boolean",
@@ -75,6 +83,7 @@ export async function BuildLlamaCppCommand({
     nodeTarget = undefined,
     metal = defaultLlamaCppMetalSupport,
     cuda = defaultLlamaCppCudaSupport,
+    vulkan = defaultLlamaCppVulkanSupport,
     noUsageExample = false,
     noCustomCmakeBuildOptionsInBinaryFolderName = false
 }: BuildCommand) {
@@ -90,13 +99,7 @@ export async function BuildLlamaCppCommand({
     const platform = getPlatform();
     const customCmakeOptions = resolveCustomCmakeOptions();
 
-    if (metal && process.platform === "darwin") {
-        console.log(`${chalk.yellow("Metal:")} enabled`);
-    }
-
-    if (cuda) {
-        console.log(`${chalk.yellow("CUDA:")} enabled`);
-    }
+    logEnabledComputeLayers({metal, cuda, vulkan}, {platform});
 
     await downloadCmakeIfNeeded(true);
 
@@ -109,7 +112,8 @@ export async function BuildLlamaCppCommand({
             : process.arch,
         computeLayers: {
             metal,
-            cuda
+            cuda,
+            vulkan
         },
         llamaCpp: {
             repo: clonedLlamaCppRepoReleaseInfo?.llamaCppGithubRepo ?? builtinLlamaCppGitHubRepo,
diff --git a/src/cli/commands/DebugCommand.ts b/src/cli/commands/DebugCommand.ts
index 20a90649..e4c99657 100644
--- a/src/cli/commands/DebugCommand.ts
+++ b/src/cli/commands/DebugCommand.ts
@@ -5,6 +5,7 @@ import chalk from "chalk";
 import {getLlama} from "../../bindings/getLlama.js";
 import {Llama} from "../../bindings/Llama.js";
 import {prettyPrintObject} from "../../utils/prettyPrintObject.js";
+import {logEnabledComputeLayers} from "../utils/logEnabledComputeLayers.js";
 
 const debugFunctions = ["vram", "cmakeOptions"] as const;
 type DebugCommand = {
@@ -59,17 +60,13 @@ async function DebugCmakeOptionsFunction() {
 }
 
 function logComputeLayers(llama: Llama) {
-    let hasEnabledLayers = false;
+    logEnabledComputeLayers({
+        metal: llama.metal,
+        cuda: llama.cuda,
+        vulkan: llama.vulkan
+    });
 
-    if (llama.metal) {
-        console.info(`${chalk.yellow("Metal:")} enabled`);
-        hasEnabledLayers = true;
-    }
-
-    if (llama.cuda) {
-        console.info(`${chalk.yellow("Metal:")} enabled`);
-        hasEnabledLayers = true;
-    }
+    const hasEnabledLayers = llama.metal || llama.cuda || llama.vulkan;
 
     if (hasEnabledLayers)
         console.info();
diff --git a/src/cli/commands/DownloadCommand.ts b/src/cli/commands/DownloadCommand.ts
index 7ae6c004..fe654028 100644
--- a/src/cli/commands/DownloadCommand.ts
+++ b/src/cli/commands/DownloadCommand.ts
@@ -3,8 +3,8 @@ import {CommandModule} from "yargs";
 import fs from "fs-extra";
 import chalk from "chalk";
 import {
-    defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease, isCI, llamaCppDirectory,
-    llamaCppDirectoryInfoFilePath
+    defaultLlamaCppCudaSupport, defaultLlamaCppVulkanSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport,
+    defaultLlamaCppRelease, isCI, llamaCppDirectory, llamaCppDirectoryInfoFilePath
 } from "../../config.js";
 import {compileLlamaCpp} from "../../bindings/utils/compileLLamaCpp.js";
 import withOra from "../../utils/withOra.js";
@@ -20,6 +20,7 @@ import {resolveCustomCmakeOptions} from "../../bindings/utils/resolveCustomCmake
 import {logBinaryUsageExampleToConsole} from "../../bindings/utils/logBinaryUsageExampleToConsole.js";
 import {resolveGithubRelease} from "../../utils/resolveGithubRelease.js";
 import {BuildOptions} from "../../bindings/types.js";
+import {logEnabledComputeLayers} from "../utils/logEnabledComputeLayers.js";
 
 type DownloadCommandArgs = {
     repo?: string,
@@ -28,6 +29,7 @@ type DownloadCommandArgs = {
     nodeTarget?: string,
     metal?: boolean,
     cuda?: boolean,
+    vulkan?: boolean,
     skipBuild?: boolean,
     noBundle?: boolean,
     noUsageExample?: boolean,
@@ -74,6 +76,11 @@ export const DownloadCommand: CommandModule<object, DownloadCommandArgs> = {
                 default: defaultLlamaCppCudaSupport,
                 description: "Compile llama.cpp with CUDA support. Can also be set via the NODE_LLAMA_CPP_CUDA environment variable"
             })
+            .option("vulkan", {
+                type: "boolean",
+                default: defaultLlamaCppVulkanSupport,
+                description: "Compile llama.cpp with Vulkan support. Can also be set via the NODE_LLAMA_CPP_VULKAN environment variable"
+            })
             .option("skipBuild", {
                 alias: "sb",
                 type: "boolean",
@@ -110,6 +117,7 @@ export async function DownloadLlamaCppCommand({
     nodeTarget = undefined,
     metal = defaultLlamaCppMetalSupport,
     cuda = defaultLlamaCppCudaSupport,
+    vulkan = defaultLlamaCppVulkanSupport,
     skipBuild = false,
     noBundle = false,
     noUsageExample = false,
@@ -123,13 +131,7 @@ export async function DownloadLlamaCppCommand({
     console.log(`${chalk.yellow("Repo:")} ${repo}`);
     console.log(`${chalk.yellow("Release:")} ${release}`);
     if (!skipBuild) {
-        if (metal && platform === "mac") {
-            console.log(`${chalk.yellow("Metal:")} enabled`);
-        }
-
-        if (cuda) {
-            console.log(`${chalk.yellow("CUDA:")} enabled`);
-        }
+        logEnabledComputeLayers({metal, cuda, vulkan}, {platform});
     }
     console.log();
 
@@ -169,7 +171,8 @@ export async function DownloadLlamaCppCommand({
             : process.arch,
         computeLayers: {
             metal,
-            cuda
+            cuda,
+            vulkan
         },
         llamaCpp: {
             repo,
diff --git a/src/cli/utils/logEnabledComputeLayers.ts b/src/cli/utils/logEnabledComputeLayers.ts
new file mode 100644
index 00000000..5ac55d12
--- /dev/null
+++ b/src/cli/utils/logEnabledComputeLayers.ts
@@ -0,0 +1,21 @@
+import chalk from "chalk";
+import {BinaryPlatform, getPlatform} from "../../bindings/utils/getPlatform.js";
+
+export function logEnabledComputeLayers({
+    metal, cuda, vulkan
+}: {
+    metal: boolean, cuda: boolean, vulkan: boolean
+}, {
+    platform = getPlatform()
+}: {
+    platform?: BinaryPlatform
+} = {}) {
+    if (metal && platform === "mac")
+        console.log(`${chalk.yellow("Metal:")} enabled`);
+
+    if (cuda)
+        console.log(`${chalk.yellow("CUDA:")} enabled`);
+
+    if (vulkan)
+        console.log(`${chalk.yellow("Vulkan:")} enabled`);
+}
diff --git a/src/config.ts b/src/config.ts
index 65164a66..267675a5 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -51,6 +51,9 @@ export const defaultLlamaCppMetalSupport = env.get("NODE_LLAMA_CPP_METAL")
 export const defaultLlamaCppCudaSupport = env.get("NODE_LLAMA_CPP_CUDA")
     .default("false")
     .asBool();
+export const defaultLlamaCppVulkanSupport = env.get("NODE_LLAMA_CPP_VULKAN")
+    .default("false")
+    .asBool();
 export const defaultLlamaCppDebugLogs = env.get("NODE_LLAMA_CPP_LOG_LEVEL")
     .default(LlamaLogLevel.debug)
     .asEnum(LlamaLogLevelValues);
diff --git a/src/utils/getBuildDefaults.ts b/src/utils/getBuildDefaults.ts
index eb18ce49..73bc64e1 100644
--- a/src/utils/getBuildDefaults.ts
+++ b/src/utils/getBuildDefaults.ts
@@ -1,5 +1,5 @@
 import {
-    defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease
+    defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultLlamaCppVulkanSupport
 } from "../config.js";
 
 export async function getBuildDefaults() {
@@ -7,6 +7,7 @@ export async function getBuildDefaults() {
         repo: defaultLlamaCppGitHubRepo,
         release: defaultLlamaCppRelease,
         metalSupport: defaultLlamaCppMetalSupport,
-        cudaSupport: defaultLlamaCppCudaSupport
+        cudaSupport: defaultLlamaCppCudaSupport,
+        vulkanSupport: defaultLlamaCppVulkanSupport
     };
 }

From cd2b09e8e0a2f9ef10bf0c7806db408bb0c60d43 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 19:04:00 +0200
Subject: [PATCH 02/13] fix: Vulkan GPU info build

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 4f4091a9..119794fa 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -1,6 +1,5 @@
 #include <stddef.h>
-#include <vulkan/vulkan>
-// #include <vulkan/vulkan.hpp>
+#include <vulkan/vulkan.hpp>
 
 bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
     vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION);

From eed12dd1f7614fa7689e955c740b7f883204e248 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 19:12:59 +0200
Subject: [PATCH 03/13] fix: Vulkan GPU info build

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 17 +++++++++--------
 src/cli/commands/DebugCommand.ts  | 15 +++++++++++----
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 119794fa..6b836a87 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -1,11 +1,12 @@
 #include <stddef.h>
+
 #include <vulkan/vulkan.hpp>
 
 bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
-    vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION);
+    vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
     vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
     vk::Instance instance = vk::createInstance(createInfo);
-    
+
     auto physicalDevices = instance.enumeratePhysicalDevices();
 
     size_t usedMem = 0;
@@ -16,11 +17,10 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
         vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
 
         std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
-        bool memoryBudgetExtensionSupported = std::any_of(
-            extensionProperties.begin(), 
-            extensionProperties.end(), 
-            [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME; }
-        );
+        bool memoryBudgetExtensionSupported =
+            std::any_of(extensionProperties.begin(), extensionProperties.end(), [](const vk::ExtensionProperties& ext) {
+                return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;
+            });
 
         if (memoryBudgetExtensionSupported) {
             vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
@@ -38,7 +38,8 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
             }
         } else {
             // VK_EXT_memory_budget extension is not supported, se we cannot determine used memory
-            std::cerr << "VK_EXT_memory_budget extension not supported" << std::endl;
+            fputs("VK_EXT_memory_budget extension not supported", stderr);
+            fflush(stderr);
             return false;
         }
     }
diff --git a/src/cli/commands/DebugCommand.ts b/src/cli/commands/DebugCommand.ts
index e4c99657..669e6d55 100644
--- a/src/cli/commands/DebugCommand.ts
+++ b/src/cli/commands/DebugCommand.ts
@@ -44,11 +44,18 @@ async function DebugVramFunction() {
 
     logComputeLayers(llama);
 
-    console.info(`${chalk.yellow("Used VRAM:")} ${Math.ceil((vramStatus.used / vramStatus.total) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`);
-    console.info(`${chalk.yellow("Free VRAM:")} ${Math.floor((vramStatus.free / vramStatus.total) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`);
+    const getPercentageString = (amount: number, total: number) => {
+        if (total === 0)
+            return "0";
+
+        return String(Math.floor((amount / total) * 100 * 100) / 100);
+    };
+
+    console.info(`${chalk.yellow("Used VRAM:")} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.grey("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`);
+    console.info(`${chalk.yellow("Free VRAM:")} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.grey("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`);
     console.info();
-    console.info(`${chalk.yellow("Used RAM:")} ${Math.ceil((usedMemory / totalMemory) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`);
-    console.info(`${chalk.yellow("Free RAM:")} ${Math.floor((freeMemory / totalMemory) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`);
+    console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.grey("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`);
+    console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.grey("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`);
 }
 
 async function DebugCmakeOptionsFunction() {

From 33c1be64915a8670b678c2564356eb6bf2f9809e Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 19:32:22 +0200
Subject: [PATCH 04/13] fix: Vulkan GPU info build

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 6b836a87..b561a093 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -15,6 +15,12 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
     for (size_t i = 0; i < physicalDevices.size(); i++) {
         vk::PhysicalDevice physicalDevice = physicalDevices[i];
         vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
+        vk::PhysicalDeviceProperties deviceProps = physicalDevice.getProperties();
+
+        if (deviceProps.deviceType == vk::PhysicalDeviceType::eCpu) {
+            // ignore CPU devices, as we don't want to count RAM from the CPU as VRAM
+            continue;
+        }
 
         std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
         bool memoryBudgetExtensionSupported =
@@ -37,7 +43,7 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
                 }
             }
         } else {
-            // VK_EXT_memory_budget extension is not supported, se we cannot determine used memory
+            // VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
             fputs("VK_EXT_memory_budget extension not supported", stderr);
             fflush(stderr);
             return false;

From 319fedeaaf7b9b54877f90612134b75cafbf33a7 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 19:47:53 +0200
Subject: [PATCH 05/13] feat: improve Vulkan warning log

---
 llama/addon.cpp                   | 11 ++++++++---
 llama/gpuInfo/cuda-gpu-info.cu    | 10 +++++-----
 llama/gpuInfo/cuda-gpu-info.h     |  4 ++--
 llama/gpuInfo/vulkan-gpu-info.cpp | 21 +++++++++++++++------
 llama/gpuInfo/vulkan-gpu-info.h   |  4 +++-
 5 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/llama/addon.cpp b/llama/addon.cpp
index 1991a03b..aa2c0670 100644
--- a/llama/addon.cpp
+++ b/llama/addon.cpp
@@ -54,10 +54,15 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
 }
 
 #ifdef GPU_INFO_USE_CUBLAS
-void lodCudaError(const char* message) {
+void logCudaError(const char* message) {
     addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
 }
 #endif
+#ifdef GPU_INFO_USE_VULKAN
+void logVulkanWarning(const char* message) {
+    addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
+}
+#endif
 
 Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
     uint64_t total = 0;
@@ -66,7 +71,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 #ifdef GPU_INFO_USE_CUBLAS
     size_t cudaDeviceTotal = 0;
     size_t cudaDeviceUsed = 0;
-    bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
+    bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
 
     if (cudeGetInfoSuccess) {
         total += cudaDeviceTotal;
@@ -77,7 +82,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 #ifdef GPU_INFO_USE_VULKAN
     uint64_t vulkanDeviceTotal = 0;
     uint64_t vulkanDeviceUsed = 0;
-    const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed);
+    const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
 
     if (vulkanDeviceSupportsMemoryBudgetExtension) {
         total += vulkanDeviceTotal;
diff --git a/llama/gpuInfo/cuda-gpu-info.cu b/llama/gpuInfo/cuda-gpu-info.cu
index c0565ad6..62a9bd89 100644
--- a/llama/gpuInfo/cuda-gpu-info.cu
+++ b/llama/gpuInfo/cuda-gpu-info.cu
@@ -15,9 +15,9 @@
 #endif
 
 
-typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
+typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
 
-bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
+bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     int current_device;
     auto getDeviceResult = cudaGetDevice(&current_device);
 
@@ -40,7 +40,7 @@ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCa
     return true;
 }
 
-bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
+bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     gpuInfoSetCudaDevice(device, errorLogCallback);
 
     size_t freeMem;
@@ -58,7 +58,7 @@ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfo
     return true;
 }
 
-int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
+int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     int deviceCount;
     auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
 
@@ -70,7 +70,7 @@ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
     return deviceCount;
 }
 
-bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
+bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
 
     if (deviceCount < 0) {
diff --git a/llama/gpuInfo/cuda-gpu-info.h b/llama/gpuInfo/cuda-gpu-info.h
index 25dcd0df..dfd0bbdd 100644
--- a/llama/gpuInfo/cuda-gpu-info.h
+++ b/llama/gpuInfo/cuda-gpu-info.h
@@ -2,6 +2,6 @@
 
 #include <stddef.h>
 
-typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
+typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
 
-bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);
+bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index b561a093..554bad44 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -2,7 +2,9 @@
 
 #include <vulkan/vulkan.hpp>
 
-bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
+typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
+
+bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
     vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
     vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
     vk::Instance instance = vk::createInstance(createInfo);
@@ -24,9 +26,11 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
 
         std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
         bool memoryBudgetExtensionSupported =
-            std::any_of(extensionProperties.begin(), extensionProperties.end(), [](const vk::ExtensionProperties& ext) {
-                return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;
-            });
+            std::any_of(
+                extensionProperties.begin(),
+                extensionProperties.end(),
+                [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
+            );
 
         if (memoryBudgetExtensionSupported) {
             vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
@@ -44,8 +48,13 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) {
             }
         } else {
             // VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
-            fputs("VK_EXT_memory_budget extension not supported", stderr);
-            fflush(stderr);
+            warningLogCallback(
+                (
+                    "Vulkan VK_EXT_memory_budget extension not supported for device \"" +
+                    std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determained for it"
+                )
+                    .c_str()
+            );
             return false;
         }
     }
diff --git a/llama/gpuInfo/vulkan-gpu-info.h b/llama/gpuInfo/vulkan-gpu-info.h
index ccd1dfe3..6a2fbe40 100644
--- a/llama/gpuInfo/vulkan-gpu-info.h
+++ b/llama/gpuInfo/vulkan-gpu-info.h
@@ -2,4 +2,6 @@
 
 #include <stddef.h>
 
-bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used);
+typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
+
+bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback);

From 4f5d901fc352d5214a30380fcdd670194cdc9ceb Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 21:19:07 +0200
Subject: [PATCH 06/13] fix: free llama backend when garbage collected

---
 llama/addon.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/llama/addon.cpp b/llama/addon.cpp
index aa2c0670..91a716de 100644
--- a/llama/addon.cpp
+++ b/llama/addon.cpp
@@ -38,6 +38,7 @@ using AddonThreadSafeLogCallbackFunction =
 AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
 bool addonJsLoggerCallbackSet = false;
 int addonLoggerLogLevel = 5;
+bool backendInitialized = false;
 
 std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
     std::vector<char> result(8, 0);
@@ -969,7 +970,7 @@ void addonCallJsLogCallback(
             called = false;
         }
     }
-    
+
     if (!called && data != nullptr) {
         if (data->logLevelNumber == 2) {
             fputs(data->stringStream->str().c_str(), stderr);
@@ -1065,8 +1066,17 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
     return info.Env().Undefined();
 }
 
+static void addonFreeLlamaBackend(Napi::Env env, int* data) {
+    if (backendInitialized) {
+        llama_backend_free();
+        backendInitialized = false;
+    }
+}
+
 Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
     llama_backend_init();
+    backendInitialized = true;
+
     exports.DefineProperties({
         Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
         Napi::PropertyDescriptor::Function("setLogger", setLogger),
@@ -1080,6 +1090,8 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
 
     llama_log_set(addonLlamaCppLogCallback, nullptr);
 
+    exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
+
     return exports;
 }
 

From ffaaaa38a136ab82e20c6badc26819198360eb83 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 21:36:27 +0200
Subject: [PATCH 07/13] fix: default system prompt

---
 docs/guide/chat-prompt-wrapper.md                    |  4 ++--
 src/config.ts                                        |  4 ++--
 .../chatWrappers/ChatMLChatPromptWrapper.test.ts     |  8 ++++----
 .../chatWrappers/FalconChatPromptWrapper.test.ts     |  8 ++++----
 .../chatWrappers/GeneralChatPromptWrapper.test.ts    | 12 ++++++------
 .../chatWrappers/LlamaChatPromptWrapper.test.ts      |  8 ++++----
 6 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/docs/guide/chat-prompt-wrapper.md b/docs/guide/chat-prompt-wrapper.md
index 3bb3e69e..bee16add 100644
--- a/docs/guide/chat-prompt-wrapper.md
+++ b/docs/guide/chat-prompt-wrapper.md
@@ -7,8 +7,8 @@ and parse its response to know whether it finished answering, or should we tell
 For example, to prompt a model with "Where do llamas come from?" we can give the model a text like this to predict the completion of:
 ```txt
 You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
-If you don't know the answer to a question, please don't share false information.
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly.
+If you don't know the answer to a question, don't share false information.
 
 ### Human
 Where do llamas come from?
diff --git a/src/config.ts b/src/config.ts
index 267675a5..43430bee 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -68,8 +68,8 @@ export const defaultXpacksCacheDirectory = env.get("NODE_LLAMA_CPP_XPACKS_CACHE_
     .asString();
 export const customCmakeOptionsEnvVarPrefix = "NODE_LLAMA_CPP_CMAKE_OPTION_";
 export const defaultChatSystemPrompt = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.\n" +
-    "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " +
-    "If you don't know the answer to a question, please don't share false information.";
+    "If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. " +
+    "If you don't know the answer to a question, don't share false information.";
 export const cliBinName = "node-llama-cpp";
 export const npxRunPrefix = "npx --no ";
 
diff --git a/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts b/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts
index ce2a8061..8e5ac264 100644
--- a/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts
+++ b/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts
@@ -43,7 +43,7 @@ describe("ChatMLChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "<|im_end|>
@@ -79,7 +79,7 @@ describe("ChatMLChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "<|im_end|>
@@ -143,7 +143,7 @@ describe("ChatMLChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "<|im_end|>
@@ -176,7 +176,7 @@ describe("ChatMLChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "<|im_end|>
diff --git a/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts b/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts
index c002ec22..3aaee42d 100644
--- a/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts
+++ b/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts
@@ -43,7 +43,7 @@ describe("FalconChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -68,7 +68,7 @@ describe("FalconChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -110,7 +110,7 @@ describe("FalconChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -132,7 +132,7 @@ describe("FalconChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
diff --git a/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts b/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts
index 79235cdc..6a38df4c 100644
--- a/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts
+++ b/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts
@@ -43,7 +43,7 @@ describe("GeneralChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -70,7 +70,7 @@ describe("GeneralChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -116,7 +116,7 @@ describe("GeneralChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -140,7 +140,7 @@ describe("GeneralChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -177,7 +177,7 @@ describe("GeneralChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
@@ -207,7 +207,7 @@ describe("GeneralChatWrapper", () => {
               "value": "BOS",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             "
 
           ",
diff --git a/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts b/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts
index d0c2e228..7bb478ca 100644
--- a/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts
+++ b/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts
@@ -52,7 +52,7 @@ describe("LlamaChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "
@@ -91,7 +91,7 @@ describe("LlamaChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "
@@ -159,7 +159,7 @@ describe("LlamaChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "
@@ -195,7 +195,7 @@ describe("LlamaChatWrapper", () => {
           ",
             },
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-          If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+          If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
             {
               "type": "specialToken",
               "value": "

From bab2034fd4be60827adb204a800af28d492a4b1b Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 21:37:16 +0200
Subject: [PATCH 08/13] test: improve function calling tests

---
 .../functionary/functions.test.ts             | 63 ++++++++++++++++++-
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/test/modelDependent/functionary/functions.test.ts b/test/modelDependent/functionary/functions.test.ts
index dd7e6146..03dd33f0 100644
--- a/test/modelDependent/functionary/functions.test.ts
+++ b/test/modelDependent/functionary/functions.test.ts
@@ -1,11 +1,11 @@
 import {describe, expect, test} from "vitest";
-import {defineChatSessionFunction, LlamaChatSession, LlamaContext, LlamaModel} from "../../../src/index.js";
+import {defineChatSessionFunction, LlamaChatSession, LlamaContext, LlamaJsonSchemaGrammar, LlamaModel} from "../../../src/index.js";
 import {getModelFile} from "../../utils/modelFiles.js";
 import {getTestLlama} from "../../utils/getTestLlama.js";
 
 describe("functionary", () => {
     describe("functions", () => {
-        test("get time", async () => {
+        test("get n-th word", async () => {
             const modelPath = await getModelFile("functionary-small-v2.2.q4_0.gguf");
             const llama = await getTestLlama();
 
@@ -45,4 +45,63 @@ describe("functionary", () => {
             timeout: 1000 * 60 * 60 * 2
         });
     });
+
+    describe("functions and grammar", () => {
+        test("get n-th word", async () => {
+            const modelPath = await getModelFile("functionary-small-v2.2.q4_0.gguf");
+            const llama = await getTestLlama();
+
+            const model = new LlamaModel({
+                llama,
+                modelPath
+            });
+            const context = new LlamaContext({
+                model,
+                contextSize: 4096
+            });
+            const chatSession = new LlamaChatSession({
+                contextSequence: context.getSequence()
+            });
+
+            const res = await chatSession.prompt("What is the second word?", {
+                functions: {
+                    getNthWord: defineChatSessionFunction({
+                        description: "Get an n-th word",
+                        params: {
+                            type: "object",
+                            properties: {
+                                n: {
+                                    enum: [1, 2, 3, 4]
+                                }
+                            }
+                        },
+                        handler(params) {
+                            return ["very", "secret", "this", "hello"][params.n - 1];
+                        }
+                    })
+                }
+            });
+
+            expect(res).to.be.eq('The second word is "secret".');
+
+            const res2SchemaGrammar = new LlamaJsonSchemaGrammar(llama, {
+                type: "object",
+                properties: {
+                    word: {
+                        type: "string"
+                    }
+                }
+            });
+
+            const res2 = await chatSession.prompt("Repeat your response", {
+                grammar: res2SchemaGrammar
+            });
+
+            const parsedRes2 = res2SchemaGrammar.parse(res2);
+
+            expect(parsedRes2).to.eql({word: "secret"});
+        }, {
+            timeout: 1000 * 60 * 60 * 2
+        });
+    });
 });

From 827e1204c4be998b69e2622b38a5a4e6f2e90787 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 22:22:41 +0200
Subject: [PATCH 09/13] feat: use Vulkan GPU info for Kompute

---
 llama/CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index 5dc5e66a..82b62fc5 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -58,10 +58,14 @@ if (LLAMA_CUBLAS)
     endif()
 endif()
 
-if (LLAMA_VULKAN)
+if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
     find_package(Vulkan)
     if (Vulkan_FOUND)
-        message(STATUS "Using Vulkan for GPU info")
+        if (LLAMA_VULKAN)
+            message(STATUS "Using Vulkan for GPU info")
+        elseif (LLAMA_KOMPUTE)
+            message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
+        endif()
 
         set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h)
         set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp)

From 0694c241c2b45cd8d47530b41ab0156406fda106 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 22:23:34 +0200
Subject: [PATCH 10/13] build: Vulkan prebuilds

---
 .github/workflows/build.yml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 3f1a302b..7465398a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -116,6 +116,24 @@ jobs:
           cuda: '12.2.0'
           method: 'network'
 
+      - name: Install Vulkan SDK on Windows
+        if: startsWith(matrix.config.os, 'windows')
+        env:
+          VULKAN_VERSION: 1.3.261.1
+        run: |
+          curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
+          & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
+          Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
+          Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
+
+      - name: Install Vulkan SDK on Ubuntu
+        if: startsWith(matrix.config.name, 'Ubuntu GCC')
+        run: |
+          wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
+          sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
+          sudo apt update
+          sudo apt install vulkan-sdk
+
       - name: Install dependencies on macOS
         if: startsWith(matrix.config.os, 'macos')
         run: |
@@ -179,10 +197,12 @@ jobs:
           if (process.env.ARTIFACT_NAME === "win") {
             await buildBinary("x64");
             await buildBinary("x64", ["--cuda"]);
+            await buildBinary("x64", ["--vulkan"]);
             // await buildBinary("arm64", [], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work
           } else if (process.env.ARTIFACT_NAME === "linux") {
             await buildBinary("x64");
             await buildBinary("x64", ["--cuda"]);
+            await buildBinary("x64", ["--vulkan"]);
             await buildBinary("arm64");
             await buildBinary("armv7l");
           } else if (process.env.ARTIFACT_NAME === "mac") {

From a71bdc986b75475ec61e27c183d70923ca35d058 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 22:24:37 +0200
Subject: [PATCH 11/13] build: run CI for PRs from forks

---
 .github/workflows/build.yml  | 9 ++++++---
 .github/workflows/prLint.yml | 1 +
 .github/workflows/test.yml   | 9 ++++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 7465398a..c5232040 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,7 +1,10 @@
 name: Build
 on:
   push:
-
+    branches:
+      - master
+      - beta
+  pull_request:
   workflow_dispatch:
 
 jobs:
@@ -319,7 +322,7 @@ jobs:
 
   release:
     name: Release
-    if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta'
+    if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta')
     runs-on: ubuntu-latest
     concurrency: release-${{ github.ref }}
     environment:
@@ -387,7 +390,7 @@ jobs:
           npm run docs:build
       - name: Upload docs to GitHub Pages
         if: steps.set-npm-url.outputs.npm-url != '' && github.ref == 'refs/heads/master'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-pages-artifact@v3
         with:
           name: pages-docs
           path: docs-site
diff --git a/.github/workflows/prLint.yml b/.github/workflows/prLint.yml
index dcd2213e..aedf09d6 100644
--- a/.github/workflows/prLint.yml
+++ b/.github/workflows/prLint.yml
@@ -3,6 +3,7 @@ on:
   pull_request:
   pull_request_target:
     types: [opened, reopened, edited, synchronize]
+
 jobs:
   lint:
     name: Lint
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2ab89558..feeb345b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,5 +1,12 @@
 name: Test
-on: [push]
+on:
+  push:
+    branches:
+      - master
+      - beta
+  pull_request:
+  workflow_dispatch:
+
 jobs:
   test:
     name: Test

From 4d67064f998a2afed782826b43bafac67bda4b34 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 22:56:46 +0200
Subject: [PATCH 12/13] fix: bug

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 554bad44..e95b0582 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -29,7 +29,7 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan
             std::any_of(
                 extensionProperties.begin(),
                 extensionProperties.end(),
-                [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
+                [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
             );
 
         if (memoryBudgetExtensionSupported) {

From e800fc430849433e57ced6e74907e7ef2b0fd266 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sat, 24 Feb 2024 23:07:28 +0200
Subject: [PATCH 13/13] docs: mention that Vulkan support is experimental

---
 src/bindings/getLlama.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts
index 11685fd5..a14efac3 100644
--- a/src/bindings/getLlama.ts
+++ b/src/bindings/getLlama.ts
@@ -41,6 +41,7 @@ export type LlamaOptions = {
 
     /**
      * Toggle Vulkan support on llama.cpp.
+     * Currently, Vulkan support is experimental. Use with caution.
      * Disabled by default.
      */
     vulkan?: boolean,