From 90f11edf0b233b4d0190cc47c64c26dcb6b6b30c Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 18:42:47 +0200 Subject: [PATCH 01/13] feat: add Vulkan support --- .github/ISSUE_TEMPLATE/bug-report.yml | 4 ++ README.md | 2 +- docs/index.md | 2 +- llama/CMakeLists.txt | 16 ++++++ llama/addon.cpp | 14 ++++++ llama/gpuInfo/vulkan-gpu-info.cpp | 50 +++++++++++++++++++ llama/gpuInfo/vulkan-gpu-info.h | 5 ++ package.json | 1 + src/bindings/Llama.ts | 10 +++- src/bindings/getLlama.ts | 15 +++++- src/bindings/types.ts | 3 +- src/bindings/utils/compileLLamaCpp.ts | 3 ++ .../getBuildFolderNameForBuildOptions.ts | 3 ++ .../utils/resolveCustomCmakeOptions.ts | 1 + src/cli/commands/BuildCommand.ts | 22 ++++---- src/cli/commands/DebugCommand.ts | 17 +++---- src/cli/commands/DownloadCommand.ts | 23 +++++---- src/cli/utils/logEnabledComputeLayers.ts | 21 ++++++++ src/config.ts | 3 ++ src/utils/getBuildDefaults.ts | 5 +- 20 files changed, 183 insertions(+), 37 deletions(-) create mode 100644 llama/gpuInfo/vulkan-gpu-info.cpp create mode 100644 llama/gpuInfo/vulkan-gpu-info.h create mode 100644 src/cli/utils/logEnabledComputeLayers.ts diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 3a7d80cc..cf75e5e3 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -76,8 +76,12 @@ body: required: false - label: CUDA support required: false + - label: Vulkan support + required: false - label: Grammar required: false + - label: Function calling + required: false - type: dropdown id: pr attributes: diff --git a/README.md b/README.md index b0a54ef9..d4e7bbaf 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ ## Features * Run a text generation model locally on your machine -* Metal and CUDA support +* Metal, CUDA and Vulkan support * Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python * Chat with a model using a chat wrapper * Use the CLI to chat with a model without writing any code diff --git a/docs/index.md b/docs/index.md index 4cf9ba07..40484332 100644 --- a/docs/index.md +++ b/docs/index.md @@ -20,7 +20,7 @@ hero: features: - icon: 🚀 - title: Metal and CUDA support + title: Metal, CUDA and Vulkan support details: Utilize the power of your GPU to run AI models faster link: /guide/#cuda-and-metal-support linkText: Learn more diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt index a92404be..5dc5e66a 100644 --- a/llama/CMakeLists.txt +++ b/llama/CMakeLists.txt @@ -58,6 +58,22 @@ if (LLAMA_CUBLAS) endif() endif() +if (LLAMA_VULKAN) + find_package(Vulkan) + if (Vulkan_FOUND) + message(STATUS "Using Vulkan for GPU info") + + set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h) + set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp) + + add_compile_definitions(GPU_INFO_USE_VULKAN) + + set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} Vulkan::Vulkan) + else() + message(WARNING "Vulkan not found. Not using it for GPU info") + endif() +endif() + if (LLAMA_HIPBLAS) list(APPEND CMAKE_PREFIX_PATH /opt/rocm) diff --git a/llama/addon.cpp b/llama/addon.cpp index acb44ea1..1991a03b 100644 --- a/llama/addon.cpp +++ b/llama/addon.cpp @@ -12,6 +12,9 @@ #ifdef GPU_INFO_USE_CUBLAS # include "gpuInfo/cuda-gpu-info.h" #endif +#ifdef GPU_INFO_USE_VULKAN +# include "gpuInfo/vulkan-gpu-info.h" +#endif #ifdef GPU_INFO_USE_METAL # include "gpuInfo/metal-gpu-info.h" #endif @@ -71,6 +74,17 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { } #endif +#ifdef GPU_INFO_USE_VULKAN + uint64_t vulkanDeviceTotal = 0; + uint64_t vulkanDeviceUsed = 0; + const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed); + + if (vulkanDeviceSupportsMemoryBudgetExtension) { + total += vulkanDeviceTotal; + used += vulkanDeviceUsed; + } +#endif + #ifdef GPU_INFO_USE_METAL uint64_t metalDeviceTotal = 0; uint64_t metalDeviceUsed = 0; diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp new file mode 100644 index 00000000..4f4091a9 --- /dev/null +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -0,0 +1,50 @@ +#include +#include +// #include + +bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { + vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION); + vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {}); + vk::Instance instance = vk::createInstance(createInfo); + + auto physicalDevices = instance.enumeratePhysicalDevices(); + + size_t usedMem = 0; + size_t totalMem = 0; + + for (size_t i = 0; i < physicalDevices.size(); i++) { + vk::PhysicalDevice physicalDevice = physicalDevices[i]; + vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties(); + + std::vector extensionProperties = physicalDevice.enumerateDeviceExtensionProperties(); + bool memoryBudgetExtensionSupported = std::any_of( + extensionProperties.begin(), + extensionProperties.end(), + [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME; } + ); + + if (memoryBudgetExtensionSupported) { + vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties; + vk::PhysicalDeviceMemoryProperties2 memProps2 = {}; + memProps2.pNext = &memoryBudgetProperties; + + physicalDevice.getMemoryProperties2(&memProps2); + + for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) { + if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) { + totalMem += memProps.memoryHeaps[i].size; + usedMem += memoryBudgetProperties.heapUsage[i]; + break; + } + } + } else { + // VK_EXT_memory_budget extension is not supported, se we cannot determine used memory + std::cerr << "VK_EXT_memory_budget extension not supported" << std::endl; + return false; + } + } + + *total = totalMem; + *used = usedMem; + return true; +} diff --git a/llama/gpuInfo/vulkan-gpu-info.h b/llama/gpuInfo/vulkan-gpu-info.h new file mode 100644 index 00000000..ccd1dfe3 --- /dev/null +++ b/llama/gpuInfo/vulkan-gpu-info.h @@ -0,0 +1,5 @@ +#pragma once + +#include + +bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used); diff --git a/package.json b/package.json index 4cd8de6a..b966142b 100644 --- a/package.json +++ b/package.json @@ -82,6 +82,7 @@ "gguf", "metal", "cuda", + "vulkan", "grammar", "json-grammar", "json-schema-grammar", diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts index d4fd2d71..7f2e6c26 100644 --- a/src/bindings/Llama.ts +++ b/src/bindings/Llama.ts @@ -20,6 +20,7 @@ export class Llama { /** @internal */ public readonly _bindings: BindingModule; /** @internal */ private readonly _metal: boolean; /** @internal */ private readonly _cuda: boolean; + /** @internal */ private readonly _vulkan: boolean; /** @internal */ private readonly _buildType: "localBuild" | "prebuilt"; /** @internal */ private readonly _cmakeOptions: Readonly>; /** @internal */ private readonly _llamaCppRelease: { @@ -36,11 +37,12 @@ export class Llama { /** @internal */ private _nextLogNeedNewLine: boolean = false; private constructor({ - bindings, metal, cuda, logLevel, logger, buildType, cmakeOptions, llamaCppRelease + bindings, metal, cuda, vulkan, logLevel, logger, buildType, cmakeOptions, llamaCppRelease }: { bindings: BindingModule metal: boolean, cuda: boolean, + vulkan: boolean, logLevel: LlamaLogLevel, logger: (level: LlamaLogLevel, message: string) => void, buildType: "localBuild" | "prebuilt", @@ -53,6 +55,7 @@ export class Llama { this._bindings = bindings; this._metal = metal; this._cuda = cuda; + this._vulkan = vulkan; this._logLevel = logLevel ?? LlamaLogLevel.debug; this._logger = logger; this._buildType = buildType; @@ -77,6 +80,10 @@ export class Llama { return this._cuda; } + public get vulkan() { + return this._vulkan; + } + public get logLevel() { return this._logLevel; } @@ -213,6 +220,7 @@ export class Llama { buildType, metal: buildMetadata.buildOptions.computeLayers.metal, cuda: buildMetadata.buildOptions.computeLayers.cuda, + vulkan: buildMetadata.buildOptions.computeLayers.vulkan, cmakeOptions: buildMetadata.buildOptions.customCmakeOptions, llamaCppRelease: { repo: buildMetadata.buildOptions.llamaCpp.repo, diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts index e7816b5f..11685fd5 100644 --- a/src/bindings/getLlama.ts +++ b/src/bindings/getLlama.ts @@ -4,7 +4,7 @@ import console from "console"; import {createRequire} from "module"; import { builtinLlamaCppGitHubRepo, builtinLlamaCppRelease, defaultLlamaCppCudaSupport, defaultLlamaCppDebugLogs, defaultLlamaCppGitHubRepo, - defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultSkipDownload, llamaLocalBuildBinsDirectory + defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultLlamaCppVulkanSupport, defaultSkipDownload, llamaLocalBuildBinsDirectory } from "../config.js"; import {getConsoleLogPrefix} from "../utils/getConsoleLogPrefix.js"; import {waitForLockfileRelease} from "../utils/waitForLockfileRelease.js"; @@ -39,6 +39,12 @@ export type LlamaOptions = { */ cuda?: boolean, + /** + * Toggle Vulkan support on llama.cpp. + * Disabled by default. + */ + vulkan?: boolean, + /** * Set the minimum log level for llama.cpp. * Defaults to "debug". @@ -184,6 +190,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp export async function getLlamaForOptions({ metal = defaultLlamaCppMetalSupport, cuda = defaultLlamaCppCudaSupport, + vulkan = defaultLlamaCppVulkanSupport, logLevel = defaultLlamaCppDebugLogs, logger = Llama.defaultConsoleLogger, build = "auto", @@ -220,7 +227,8 @@ export async function getLlamaForOptions({ arch, computeLayers: { metal, - cuda + cuda, + vulkan }, llamaCpp: { repo: clonedLlamaCppRepoReleaseInfo?.llamaCppGithubRepo ?? builtinLlamaCppGitHubRepo, @@ -356,6 +364,9 @@ function describeBinary(binaryOptions: BuildOptions) { if (binaryOptions.computeLayers.cuda) additions.push("with CUDA support"); + if (binaryOptions.computeLayers.vulkan) + additions.push("with Vulkan support"); + if (binaryOptions.customCmakeOptions.size > 0) additions.push("with custom build options"); diff --git a/src/bindings/types.ts b/src/bindings/types.ts index c5beadcb..a5a89baa 100644 --- a/src/bindings/types.ts +++ b/src/bindings/types.ts @@ -8,7 +8,8 @@ export type BuildOptions = { arch: typeof process.arch, computeLayers: { metal: boolean, - cuda: boolean + cuda: boolean, + vulkan: boolean }, llamaCpp: { repo: string, diff --git a/src/bindings/utils/compileLLamaCpp.ts b/src/bindings/utils/compileLLamaCpp.ts index 24eace3a..d43767f3 100644 --- a/src/bindings/utils/compileLLamaCpp.ts +++ b/src/bindings/utils/compileLLamaCpp.ts @@ -64,6 +64,9 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, { if (buildOptions.computeLayers.cuda && !cmakeCustomOptions.has("LLAMA_CUBLAS")) cmakeCustomOptions.set("LLAMA_CUBLAS", "1"); + if (buildOptions.computeLayers.vulkan && !cmakeCustomOptions.has("LLAMA_VULKAN")) + cmakeCustomOptions.set("LLAMA_VULKAN", "1"); + if (toolchainFile != null && !cmakeCustomOptions.has("CMAKE_TOOLCHAIN_FILE")) cmakeCustomOptions.set("CMAKE_TOOLCHAIN_FILE", toolchainFile); diff --git a/src/bindings/utils/getBuildFolderNameForBuildOptions.ts b/src/bindings/utils/getBuildFolderNameForBuildOptions.ts index 36bb850f..64ea524d 100644 --- a/src/bindings/utils/getBuildFolderNameForBuildOptions.ts +++ b/src/bindings/utils/getBuildFolderNameForBuildOptions.ts @@ -10,6 +10,9 @@ export async function getBuildFolderNameForBuildOptions(buildOptions: BuildOptio else if (buildOptions.computeLayers.cuda) nameParts.push("cuda"); + if (buildOptions.computeLayers.vulkan) + nameParts.push("vulkan"); + if (buildOptions.llamaCpp.repo !== builtinLlamaCppGitHubRepo || buildOptions.llamaCpp.release !== builtinLlamaCppRelease) nameParts.push("release-" + await getFolderNamePartForRelease(buildOptions.llamaCpp.repo, buildOptions.llamaCpp.release)); diff --git a/src/bindings/utils/resolveCustomCmakeOptions.ts b/src/bindings/utils/resolveCustomCmakeOptions.ts index 1d269bd5..f9a11006 100644 --- a/src/bindings/utils/resolveCustomCmakeOptions.ts +++ b/src/bindings/utils/resolveCustomCmakeOptions.ts @@ -8,6 +8,7 @@ export function resolveCustomCmakeOptions(customCmakeOptions?: Record = { default: defaultLlamaCppCudaSupport, description: "Compile llama.cpp with CUDA support. Can also be set via the NODE_LLAMA_CPP_CUDA environment variable" }) + .option("vulkan", { + type: "boolean", + default: defaultLlamaCppVulkanSupport, + description: "Compile llama.cpp with Vulkan support. Can also be set via the NODE_LLAMA_CPP_VULKAN environment variable" + }) .option("noUsageExample", { alias: "nu", type: "boolean", @@ -75,6 +83,7 @@ export async function BuildLlamaCppCommand({ nodeTarget = undefined, metal = defaultLlamaCppMetalSupport, cuda = defaultLlamaCppCudaSupport, + vulkan = defaultLlamaCppVulkanSupport, noUsageExample = false, noCustomCmakeBuildOptionsInBinaryFolderName = false }: BuildCommand) { @@ -90,13 +99,7 @@ export async function BuildLlamaCppCommand({ const platform = getPlatform(); const customCmakeOptions = resolveCustomCmakeOptions(); - if (metal && process.platform === "darwin") { - console.log(`${chalk.yellow("Metal:")} enabled`); - } - - if (cuda) { - console.log(`${chalk.yellow("CUDA:")} enabled`); - } + logEnabledComputeLayers({metal, cuda, vulkan}, {platform}); await downloadCmakeIfNeeded(true); @@ -109,7 +112,8 @@ export async function BuildLlamaCppCommand({ : process.arch, computeLayers: { metal, - cuda + cuda, + vulkan }, llamaCpp: { repo: clonedLlamaCppRepoReleaseInfo?.llamaCppGithubRepo ?? builtinLlamaCppGitHubRepo, diff --git a/src/cli/commands/DebugCommand.ts b/src/cli/commands/DebugCommand.ts index 20a90649..e4c99657 100644 --- a/src/cli/commands/DebugCommand.ts +++ b/src/cli/commands/DebugCommand.ts @@ -5,6 +5,7 @@ import chalk from "chalk"; import {getLlama} from "../../bindings/getLlama.js"; import {Llama} from "../../bindings/Llama.js"; import {prettyPrintObject} from "../../utils/prettyPrintObject.js"; +import {logEnabledComputeLayers} from "../utils/logEnabledComputeLayers.js"; const debugFunctions = ["vram", "cmakeOptions"] as const; type DebugCommand = { @@ -59,17 +60,13 @@ async function DebugCmakeOptionsFunction() { } function logComputeLayers(llama: Llama) { - let hasEnabledLayers = false; + logEnabledComputeLayers({ + metal: llama.metal, + cuda: llama.cuda, + vulkan: llama.vulkan + }); - if (llama.metal) { - console.info(`${chalk.yellow("Metal:")} enabled`); - hasEnabledLayers = true; - } - - if (llama.cuda) { - console.info(`${chalk.yellow("Metal:")} enabled`); - hasEnabledLayers = true; - } + const hasEnabledLayers = llama.metal || llama.cuda || llama.vulkan; if (hasEnabledLayers) console.info(); diff --git a/src/cli/commands/DownloadCommand.ts b/src/cli/commands/DownloadCommand.ts index 7ae6c004..fe654028 100644 --- a/src/cli/commands/DownloadCommand.ts +++ b/src/cli/commands/DownloadCommand.ts @@ -3,8 +3,8 @@ import {CommandModule} from "yargs"; import fs from "fs-extra"; import chalk from "chalk"; import { - defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease, isCI, llamaCppDirectory, - llamaCppDirectoryInfoFilePath + defaultLlamaCppCudaSupport, defaultLlamaCppVulkanSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, + defaultLlamaCppRelease, isCI, llamaCppDirectory, llamaCppDirectoryInfoFilePath } from "../../config.js"; import {compileLlamaCpp} from "../../bindings/utils/compileLLamaCpp.js"; import withOra from "../../utils/withOra.js"; @@ -20,6 +20,7 @@ import {resolveCustomCmakeOptions} from "../../bindings/utils/resolveCustomCmake import {logBinaryUsageExampleToConsole} from "../../bindings/utils/logBinaryUsageExampleToConsole.js"; import {resolveGithubRelease} from "../../utils/resolveGithubRelease.js"; import {BuildOptions} from "../../bindings/types.js"; +import {logEnabledComputeLayers} from "../utils/logEnabledComputeLayers.js"; type DownloadCommandArgs = { repo?: string, @@ -28,6 +29,7 @@ type DownloadCommandArgs = { nodeTarget?: string, metal?: boolean, cuda?: boolean, + vulkan?: boolean, skipBuild?: boolean, noBundle?: boolean, noUsageExample?: boolean, @@ -74,6 +76,11 @@ export const DownloadCommand: CommandModule = { default: defaultLlamaCppCudaSupport, description: "Compile llama.cpp with CUDA support. Can also be set via the NODE_LLAMA_CPP_CUDA environment variable" }) + .option("vulkan", { + type: "boolean", + default: defaultLlamaCppVulkanSupport, + description: "Compile llama.cpp with Vulkan support. Can also be set via the NODE_LLAMA_CPP_VULKAN environment variable" + }) .option("skipBuild", { alias: "sb", type: "boolean", @@ -110,6 +117,7 @@ export async function DownloadLlamaCppCommand({ nodeTarget = undefined, metal = defaultLlamaCppMetalSupport, cuda = defaultLlamaCppCudaSupport, + vulkan = defaultLlamaCppVulkanSupport, skipBuild = false, noBundle = false, noUsageExample = false, @@ -123,13 +131,7 @@ export async function DownloadLlamaCppCommand({ console.log(`${chalk.yellow("Repo:")} ${repo}`); console.log(`${chalk.yellow("Release:")} ${release}`); if (!skipBuild) { - if (metal && platform === "mac") { - console.log(`${chalk.yellow("Metal:")} enabled`); - } - - if (cuda) { - console.log(`${chalk.yellow("CUDA:")} enabled`); - } + logEnabledComputeLayers({metal, cuda, vulkan}, {platform}); } console.log(); @@ -169,7 +171,8 @@ export async function DownloadLlamaCppCommand({ : process.arch, computeLayers: { metal, - cuda + cuda, + vulkan }, llamaCpp: { repo, diff --git a/src/cli/utils/logEnabledComputeLayers.ts b/src/cli/utils/logEnabledComputeLayers.ts new file mode 100644 index 00000000..5ac55d12 --- /dev/null +++ b/src/cli/utils/logEnabledComputeLayers.ts @@ -0,0 +1,21 @@ +import chalk from "chalk"; +import {BinaryPlatform, getPlatform} from "../../bindings/utils/getPlatform.js"; + +export function logEnabledComputeLayers({ + metal, cuda, vulkan +}: { + metal: boolean, cuda: boolean, vulkan: boolean +}, { + platform = getPlatform() +}: { + platform?: BinaryPlatform +} = {}) { + if (metal && platform === "mac") + console.log(`${chalk.yellow("Metal:")} enabled`); + + if (cuda) + console.log(`${chalk.yellow("CUDA:")} enabled`); + + if (vulkan) + console.log(`${chalk.yellow("Vulkan:")} enabled`); +} diff --git a/src/config.ts b/src/config.ts index 65164a66..267675a5 100644 --- a/src/config.ts +++ b/src/config.ts @@ -51,6 +51,9 @@ export const defaultLlamaCppMetalSupport = env.get("NODE_LLAMA_CPP_METAL") export const defaultLlamaCppCudaSupport = env.get("NODE_LLAMA_CPP_CUDA") .default("false") .asBool(); +export const defaultLlamaCppVulkanSupport = env.get("NODE_LLAMA_CPP_VULKAN") + .default("false") + .asBool(); export const defaultLlamaCppDebugLogs = env.get("NODE_LLAMA_CPP_LOG_LEVEL") .default(LlamaLogLevel.debug) .asEnum(LlamaLogLevelValues); diff --git a/src/utils/getBuildDefaults.ts b/src/utils/getBuildDefaults.ts index eb18ce49..73bc64e1 100644 --- a/src/utils/getBuildDefaults.ts +++ b/src/utils/getBuildDefaults.ts @@ -1,5 +1,5 @@ import { - defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease + defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultLlamaCppVulkanSupport } from "../config.js"; export async function getBuildDefaults() { @@ -7,6 +7,7 @@ export async function getBuildDefaults() { repo: defaultLlamaCppGitHubRepo, release: defaultLlamaCppRelease, metalSupport: defaultLlamaCppMetalSupport, - cudaSupport: defaultLlamaCppCudaSupport + cudaSupport: defaultLlamaCppCudaSupport, + vulkanSupport: defaultLlamaCppVulkanSupport }; } From cd2b09e8e0a2f9ef10bf0c7806db408bb0c60d43 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 19:04:00 +0200 Subject: [PATCH 02/13] fix: Vulkan GPU info build --- llama/gpuInfo/vulkan-gpu-info.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 4f4091a9..119794fa 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -1,6 +1,5 @@ #include -#include -// #include +#include bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION); From eed12dd1f7614fa7689e955c740b7f883204e248 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 19:12:59 +0200 Subject: [PATCH 03/13] fix: Vulkan GPU info build --- llama/gpuInfo/vulkan-gpu-info.cpp | 17 +++++++++-------- src/cli/commands/DebugCommand.ts | 15 +++++++++++---- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 119794fa..6b836a87 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -1,11 +1,12 @@ #include + #include bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { - vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION); + vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2); vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {}); vk::Instance instance = vk::createInstance(createInfo); - + auto physicalDevices = instance.enumeratePhysicalDevices(); size_t usedMem = 0; @@ -16,11 +17,10 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties(); std::vector extensionProperties = physicalDevice.enumerateDeviceExtensionProperties(); - bool memoryBudgetExtensionSupported = std::any_of( - extensionProperties.begin(), - extensionProperties.end(), - [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME; } - ); + bool memoryBudgetExtensionSupported = + std::any_of(extensionProperties.begin(), extensionProperties.end(), [](const vk::ExtensionProperties& ext) { + return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME; + }); if (memoryBudgetExtensionSupported) { vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties; @@ -38,7 +38,8 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { } } else { // VK_EXT_memory_budget extension is not supported, se we cannot determine used memory - std::cerr << "VK_EXT_memory_budget extension not supported" << std::endl; + fputs("VK_EXT_memory_budget extension not supported", stderr); + fflush(stderr); return false; } } diff --git a/src/cli/commands/DebugCommand.ts b/src/cli/commands/DebugCommand.ts index e4c99657..669e6d55 100644 --- a/src/cli/commands/DebugCommand.ts +++ b/src/cli/commands/DebugCommand.ts @@ -44,11 +44,18 @@ async function DebugVramFunction() { logComputeLayers(llama); - console.info(`${chalk.yellow("Used VRAM:")} ${Math.ceil((vramStatus.used / vramStatus.total) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`); - console.info(`${chalk.yellow("Free VRAM:")} ${Math.floor((vramStatus.free / vramStatus.total) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`); + const getPercentageString = (amount: number, total: number) => { + if (total === 0) + return "0"; + + return String(Math.floor((amount / total) * 100 * 100) / 100); + }; + + console.info(`${chalk.yellow("Used VRAM:")} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.grey("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`); + console.info(`${chalk.yellow("Free VRAM:")} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.grey("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`); console.info(); - console.info(`${chalk.yellow("Used RAM:")} ${Math.ceil((usedMemory / totalMemory) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`); - console.info(`${chalk.yellow("Free RAM:")} ${Math.floor((freeMemory / totalMemory) * 100 * 100) / 100}% ${chalk.grey("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`); + console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.grey("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`); + console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.grey("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`); } async function DebugCmakeOptionsFunction() { From 33c1be64915a8670b678c2564356eb6bf2f9809e Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 19:32:22 +0200 Subject: [PATCH 04/13] fix: Vulkan GPU info build --- llama/gpuInfo/vulkan-gpu-info.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 6b836a87..b561a093 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -15,6 +15,12 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { for (size_t i = 0; i < physicalDevices.size(); i++) { vk::PhysicalDevice physicalDevice = physicalDevices[i]; vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties(); + vk::PhysicalDeviceProperties deviceProps = physicalDevice.getProperties(); + + if (deviceProps.deviceType == vk::PhysicalDeviceType::eCpu) { + // ignore CPU devices, as we don't want to count RAM from the CPU as VRAM + continue; + } std::vector extensionProperties = physicalDevice.enumerateDeviceExtensionProperties(); bool memoryBudgetExtensionSupported = @@ -37,7 +43,7 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { } } } else { - // VK_EXT_memory_budget extension is not supported, se we cannot determine used memory + // VK_EXT_memory_budget extension is not supported, so we cannot determine used memory fputs("VK_EXT_memory_budget extension not supported", stderr); fflush(stderr); return false; From 319fedeaaf7b9b54877f90612134b75cafbf33a7 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 19:47:53 +0200 Subject: [PATCH 05/13] feat: improve Vulkan warning log --- llama/addon.cpp | 11 ++++++++--- llama/gpuInfo/cuda-gpu-info.cu | 10 +++++----- llama/gpuInfo/cuda-gpu-info.h | 4 ++-- llama/gpuInfo/vulkan-gpu-info.cpp | 21 +++++++++++++++------ llama/gpuInfo/vulkan-gpu-info.h | 4 +++- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/llama/addon.cpp b/llama/addon.cpp index 1991a03b..aa2c0670 100644 --- a/llama/addon.cpp +++ b/llama/addon.cpp @@ -54,10 +54,15 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to } #ifdef GPU_INFO_USE_CUBLAS -void lodCudaError(const char* message) { +void logCudaError(const char* message) { addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr); } #endif +#ifdef GPU_INFO_USE_VULKAN +void logVulkanWarning(const char* message) { + addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr); +} +#endif Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { uint64_t total = 0; @@ -66,7 +71,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { #ifdef GPU_INFO_USE_CUBLAS size_t cudaDeviceTotal = 0; size_t cudaDeviceUsed = 0; - bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError); + bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError); if (cudeGetInfoSuccess) { total += cudaDeviceTotal; @@ -77,7 +82,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { #ifdef GPU_INFO_USE_VULKAN uint64_t vulkanDeviceTotal = 0; uint64_t vulkanDeviceUsed = 0; - const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed); + const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning); if (vulkanDeviceSupportsMemoryBudgetExtension) { total += vulkanDeviceTotal; diff --git a/llama/gpuInfo/cuda-gpu-info.cu b/llama/gpuInfo/cuda-gpu-info.cu index c0565ad6..62a9bd89 100644 --- a/llama/gpuInfo/cuda-gpu-info.cu +++ b/llama/gpuInfo/cuda-gpu-info.cu @@ -15,9 +15,9 @@ #endif -typedef void (*gpuInfoErrorLogCallback_t)(const char* message); +typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message); -bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) { +bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) { int current_device; auto getDeviceResult = cudaGetDevice(¤t_device); @@ -40,7 +40,7 @@ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCa return true; } -bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) { +bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) { gpuInfoSetCudaDevice(device, errorLogCallback); size_t freeMem; @@ -58,7 +58,7 @@ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfo return true; } -int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) { +int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) { int deviceCount; auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount); @@ -70,7 +70,7 @@ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) { return deviceCount; } -bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) { +bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) { int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback); if (deviceCount < 0) { diff --git a/llama/gpuInfo/cuda-gpu-info.h b/llama/gpuInfo/cuda-gpu-info.h index 25dcd0df..dfd0bbdd 100644 --- a/llama/gpuInfo/cuda-gpu-info.h +++ b/llama/gpuInfo/cuda-gpu-info.h @@ -2,6 +2,6 @@ #include -typedef void (*gpuInfoErrorLogCallback_t)(const char* message); +typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message); -bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback); +bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback); diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index b561a093..554bad44 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -2,7 +2,9 @@ #include -bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { +typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message); + +bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) { vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2); vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {}); vk::Instance instance = vk::createInstance(createInfo); @@ -24,9 +26,11 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { std::vector extensionProperties = physicalDevice.enumerateDeviceExtensionProperties(); bool memoryBudgetExtensionSupported = - std::any_of(extensionProperties.begin(), extensionProperties.end(), [](const vk::ExtensionProperties& ext) { - return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME; - }); + std::any_of( + extensionProperties.begin(), + extensionProperties.end(), + [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;} + ); if (memoryBudgetExtensionSupported) { vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties; @@ -44,8 +48,13 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used) { } } else { // VK_EXT_memory_budget extension is not supported, so we cannot determine used memory - fputs("VK_EXT_memory_budget extension not supported", stderr); - fflush(stderr); + warningLogCallback( + ( + "Vulkan VK_EXT_memory_budget extension not supported for device \"" + + std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determained for it" + ) + .c_str() + ); return false; } } diff --git a/llama/gpuInfo/vulkan-gpu-info.h b/llama/gpuInfo/vulkan-gpu-info.h index ccd1dfe3..6a2fbe40 100644 --- a/llama/gpuInfo/vulkan-gpu-info.h +++ b/llama/gpuInfo/vulkan-gpu-info.h @@ -2,4 +2,6 @@ #include -bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used); +typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message); + +bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback); From 4f5d901fc352d5214a30380fcdd670194cdc9ceb Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 21:19:07 +0200 Subject: [PATCH 06/13] fix: free llama backend when garbage collected --- llama/addon.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/llama/addon.cpp b/llama/addon.cpp index aa2c0670..91a716de 100644 --- a/llama/addon.cpp +++ b/llama/addon.cpp @@ -38,6 +38,7 @@ using AddonThreadSafeLogCallbackFunction = AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback; bool addonJsLoggerCallbackSet = false; int addonLoggerLogLevel = 5; +bool backendInitialized = false; std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) { std::vector result(8, 0); @@ -969,7 +970,7 @@ void addonCallJsLogCallback( called = false; } } - + if (!called && data != nullptr) { if (data->logLevelNumber == 2) { fputs(data->stringStream->str().c_str(), stderr); @@ -1065,8 +1066,17 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) { return info.Env().Undefined(); } +static void addonFreeLlamaBackend(Napi::Env env, int* data) { + if (backendInitialized) { + llama_backend_free(); + backendInitialized = false; + } +} + Napi::Object registerCallback(Napi::Env env, Napi::Object exports) { llama_backend_init(); + backendInitialized = true; + exports.DefineProperties({ Napi::PropertyDescriptor::Function("systemInfo", systemInfo), Napi::PropertyDescriptor::Function("setLogger", setLogger), @@ -1080,6 +1090,8 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) { llama_log_set(addonLlamaCppLogCallback, nullptr); + exports.AddFinalizer(addonFreeLlamaBackend, static_cast(nullptr)); + return exports; } From ffaaaa38a136ab82e20c6badc26819198360eb83 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 21:36:27 +0200 Subject: [PATCH 07/13] fix: default system prompt --- docs/guide/chat-prompt-wrapper.md | 4 ++-- src/config.ts | 4 ++-- .../chatWrappers/ChatMLChatPromptWrapper.test.ts | 8 ++++---- .../chatWrappers/FalconChatPromptWrapper.test.ts | 8 ++++---- .../chatWrappers/GeneralChatPromptWrapper.test.ts | 12 ++++++------ .../chatWrappers/LlamaChatPromptWrapper.test.ts | 8 ++++---- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/guide/chat-prompt-wrapper.md b/docs/guide/chat-prompt-wrapper.md index 3bb3e69e..bee16add 100644 --- a/docs/guide/chat-prompt-wrapper.md +++ b/docs/guide/chat-prompt-wrapper.md @@ -7,8 +7,8 @@ and parse its response to know whether it finished answering, or should we tell For example, to prompt a model with "Where do llamas come from?" we can give the model a text like this to predict the completion of: ```txt You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. -If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. -If you don't know the answer to a question, please don't share false information. +If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. +If you don't know the answer to a question, don't share false information. ### Human Where do llamas come from? diff --git a/src/config.ts b/src/config.ts index 267675a5..43430bee 100644 --- a/src/config.ts +++ b/src/config.ts @@ -68,8 +68,8 @@ export const defaultXpacksCacheDirectory = env.get("NODE_LLAMA_CPP_XPACKS_CACHE_ .asString(); export const customCmakeOptionsEnvVarPrefix = "NODE_LLAMA_CPP_CMAKE_OPTION_"; export const defaultChatSystemPrompt = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.\n" + - "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " + - "If you don't know the answer to a question, please don't share false information."; + "If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. " + + "If you don't know the answer to a question, don't share false information."; export const cliBinName = "node-llama-cpp"; export const npxRunPrefix = "npx --no "; diff --git a/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts b/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts index ce2a8061..8e5ac264 100644 --- a/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts +++ b/test/standalone/chatWrappers/ChatMLChatPromptWrapper.test.ts @@ -43,7 +43,7 @@ describe("ChatMLChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": "<|im_end|> @@ -79,7 +79,7 @@ describe("ChatMLChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": "<|im_end|> @@ -143,7 +143,7 @@ describe("ChatMLChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": "<|im_end|> @@ -176,7 +176,7 @@ describe("ChatMLChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": "<|im_end|> diff --git a/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts b/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts index c002ec22..3aaee42d 100644 --- a/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts +++ b/test/standalone/chatWrappers/FalconChatPromptWrapper.test.ts @@ -43,7 +43,7 @@ describe("FalconChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -68,7 +68,7 @@ describe("FalconChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -110,7 +110,7 @@ describe("FalconChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -132,7 +132,7 @@ describe("FalconChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", diff --git a/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts b/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts index 79235cdc..6a38df4c 100644 --- a/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts +++ b/test/standalone/chatWrappers/GeneralChatPromptWrapper.test.ts @@ -43,7 +43,7 @@ describe("GeneralChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -70,7 +70,7 @@ describe("GeneralChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -116,7 +116,7 @@ describe("GeneralChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -140,7 +140,7 @@ describe("GeneralChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -177,7 +177,7 @@ describe("GeneralChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", @@ -207,7 +207,7 @@ describe("GeneralChatWrapper", () => { "value": "BOS", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", " ", diff --git a/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts b/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts index d0c2e228..7bb478ca 100644 --- a/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts +++ b/test/standalone/chatWrappers/LlamaChatPromptWrapper.test.ts @@ -52,7 +52,7 @@ describe("LlamaChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": " @@ -91,7 +91,7 @@ describe("LlamaChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": " @@ -159,7 +159,7 @@ describe("LlamaChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": " @@ -195,7 +195,7 @@ describe("LlamaChatWrapper", () => { ", }, "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. - If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", { "type": "specialToken", "value": " From bab2034fd4be60827adb204a800af28d492a4b1b Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 21:37:16 +0200 Subject: [PATCH 08/13] test: improve function calling tests --- .../functionary/functions.test.ts | 63 ++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/test/modelDependent/functionary/functions.test.ts b/test/modelDependent/functionary/functions.test.ts index dd7e6146..03dd33f0 100644 --- a/test/modelDependent/functionary/functions.test.ts +++ b/test/modelDependent/functionary/functions.test.ts @@ -1,11 +1,11 @@ import {describe, expect, test} from "vitest"; -import {defineChatSessionFunction, LlamaChatSession, LlamaContext, LlamaModel} from "../../../src/index.js"; +import {defineChatSessionFunction, LlamaChatSession, LlamaContext, LlamaJsonSchemaGrammar, LlamaModel} from "../../../src/index.js"; import {getModelFile} from "../../utils/modelFiles.js"; import {getTestLlama} from "../../utils/getTestLlama.js"; describe("functionary", () => { describe("functions", () => { - test("get time", async () => { + test("get n-th word", async () => { const modelPath = await getModelFile("functionary-small-v2.2.q4_0.gguf"); const llama = await getTestLlama(); @@ -45,4 +45,63 @@ describe("functionary", () => { timeout: 1000 * 60 * 60 * 2 }); }); + + describe("functions and grammar", () => { + test("get n-th word", async () => { + const modelPath = await getModelFile("functionary-small-v2.2.q4_0.gguf"); + const llama = await getTestLlama(); + + const model = new LlamaModel({ + llama, + modelPath + }); + const context = new LlamaContext({ + model, + contextSize: 4096 + }); + const chatSession = new LlamaChatSession({ + contextSequence: context.getSequence() + }); + + const res = await chatSession.prompt("What is the second word?", { + functions: { + getNthWord: defineChatSessionFunction({ + description: "Get an n-th word", + params: { + type: "object", + properties: { + n: { + enum: [1, 2, 3, 4] + } + } + }, + handler(params) { + return ["very", "secret", "this", "hello"][params.n - 1]; + } + }) + } + }); + + expect(res).to.be.eq('The second word is "secret".'); + + const res2SchemaGrammar = new LlamaJsonSchemaGrammar(llama, { + type: "object", + properties: { + word: { + type: "string" + } + } + }); + + const res2 = await chatSession.prompt("Repeat your response", { + grammar: res2SchemaGrammar + }); + + const parsedRes2 = res2SchemaGrammar.parse(res2); + + expect(parsedRes2).to.eql({word: "secret"}); + }, { + timeout: 1000 * 60 * 60 * 2 + }); + }); }); From 827e1204c4be998b69e2622b38a5a4e6f2e90787 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 22:22:41 +0200 Subject: [PATCH 09/13] feat: use Vulkan GPU info for Kompute --- llama/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt index 5dc5e66a..82b62fc5 100644 --- a/llama/CMakeLists.txt +++ b/llama/CMakeLists.txt @@ -58,10 +58,14 @@ if (LLAMA_CUBLAS) endif() endif() -if (LLAMA_VULKAN) +if (LLAMA_VULKAN OR LLAMA_KOMPUTE) find_package(Vulkan) if (Vulkan_FOUND) - message(STATUS "Using Vulkan for GPU info") + if (LLAMA_VULKAN) + message(STATUS "Using Vulkan for GPU info") + elseif (LLAMA_KOMPUTE) + message(STATUS "Using Vulkan for GPU info because Kompute is enabled") + endif() set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h) set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp) From 0694c241c2b45cd8d47530b41ab0156406fda106 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 22:23:34 +0200 Subject: [PATCH 10/13] build: Vulkan prebuilds --- .github/workflows/build.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3f1a302b..7465398a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -116,6 +116,24 @@ jobs: cuda: '12.2.0' method: 'network' + - name: Install Vulkan SDK on Windows + if: startsWith(matrix.config.os, 'windows') + env: + VULKAN_VERSION: 1.3.261.1 + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" + + - name: Install Vulkan SDK on Ubuntu + if: startsWith(matrix.config.name, 'Ubuntu GCC') + run: | + wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + sudo apt update + sudo apt install vulkan-sdk + - name: Install dependencies on macOS if: startsWith(matrix.config.os, 'macos') run: | @@ -179,10 +197,12 @@ jobs: if (process.env.ARTIFACT_NAME === "win") { await buildBinary("x64"); await buildBinary("x64", ["--cuda"]); + await buildBinary("x64", ["--vulkan"]); // await buildBinary("arm64", [], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work } else if (process.env.ARTIFACT_NAME === "linux") { await buildBinary("x64"); await buildBinary("x64", ["--cuda"]); + await buildBinary("x64", ["--vulkan"]); await buildBinary("arm64"); await buildBinary("armv7l"); } else if (process.env.ARTIFACT_NAME === "mac") { From a71bdc986b75475ec61e27c183d70923ca35d058 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 22:24:37 +0200 Subject: [PATCH 11/13] build: run CI for PRs from forks --- .github/workflows/build.yml | 9 ++++++--- .github/workflows/prLint.yml | 1 + .github/workflows/test.yml | 9 ++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7465398a..c5232040 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,7 +1,10 @@ name: Build on: push: - + branches: + - master + - beta + pull_request: workflow_dispatch: jobs: @@ -319,7 +322,7 @@ jobs: release: name: Release - if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta' + if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') runs-on: ubuntu-latest concurrency: release-${{ github.ref }} environment: @@ -387,7 +390,7 @@ jobs: npm run docs:build - name: Upload docs to GitHub Pages if: steps.set-npm-url.outputs.npm-url != '' && github.ref == 'refs/heads/master' - uses: actions/upload-artifact@v4 + uses: actions/upload-pages-artifact@v3 with: name: pages-docs path: docs-site diff --git a/.github/workflows/prLint.yml b/.github/workflows/prLint.yml index dcd2213e..aedf09d6 100644 --- a/.github/workflows/prLint.yml +++ b/.github/workflows/prLint.yml @@ -3,6 +3,7 @@ on: pull_request: pull_request_target: types: [opened, reopened, edited, synchronize] + jobs: lint: name: Lint diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2ab89558..feeb345b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,12 @@ name: Test -on: [push] +on: + push: + branches: + - master + - beta + pull_request: + workflow_dispatch: + jobs: test: name: Test From 4d67064f998a2afed782826b43bafac67bda4b34 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 22:56:46 +0200 Subject: [PATCH 12/13] fix: bug --- llama/gpuInfo/vulkan-gpu-info.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 554bad44..e95b0582 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -29,7 +29,7 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan std::any_of( extensionProperties.begin(), extensionProperties.end(), - [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;} + [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;} ); if (memoryBudgetExtensionSupported) { From e800fc430849433e57ced6e74907e7ef2b0fd266 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 24 Feb 2024 23:07:28 +0200 Subject: [PATCH 13/13] docs: mention that Vulkan support is experimental --- src/bindings/getLlama.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts index 11685fd5..a14efac3 100644 --- a/src/bindings/getLlama.ts +++ b/src/bindings/getLlama.ts @@ -41,6 +41,7 @@ export type LlamaOptions = { /** * Toggle Vulkan support on llama.cpp. + * Currently, Vulkan support is experimental. Use with caution. * Disabled by default. */ vulkan?: boolean,