Skip to content

feat: automatically adapt to current free VRAM state #182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 52 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
621c736
fix: bugs
giladgd Mar 18, 2024
1ce8cf1
docs: add canonical URL link
giladgd Mar 18, 2024
7c333d0
test: switch to new vitest test signature
giladgd Mar 18, 2024
13e1ad6
test: separate gguf tests to model dependent and model independent tests
giladgd Mar 18, 2024
3fc475b
chore: update `.gitignore`
giladgd Mar 18, 2024
45edf6a
feat: add disabled recursive clone feature
giladgd Mar 18, 2024
6f1abcb
chore: update `vitest`
giladgd Mar 18, 2024
3995ae1
refactor: `GgufParser`
giladgd Mar 18, 2024
34a4c50
refactor: rename `stream` to `fileReader`
giladgd Mar 18, 2024
69466ae
refactor: gguf
giladgd Mar 18, 2024
d0bd7cc
feat: `inspect gguf` command
giladgd Mar 19, 2024
ab94b63
fix: rename `build` option to `builds` in the `clear` command
giladgd Mar 19, 2024
63ba9b9
feat: read tensor info from a GGUF file
giladgd Mar 20, 2024
29c629c
style: lint fix
giladgd Mar 20, 2024
24093b1
refactor: use a gguf version specific parser
giladgd Mar 20, 2024
98e911d
refactor: rename `ggufParser` directory to `parser`
giladgd Mar 20, 2024
550188b
refactor: move files
giladgd Mar 20, 2024
642ccc8
refactor: rename `getGgufFileInfo`
giladgd Mar 20, 2024
7bf8a7c
feat: add more options to `inspect gguf` command
giladgd Mar 20, 2024
ddbd29e
feat: calculate model VRAM usage based on header tensor info
giladgd Mar 20, 2024
6b9d2b9
test: skip VRAM tests when running on a machine without a GPU
giladgd Mar 21, 2024
cad3fd2
feat: calculate context VRAM usage based on header tensor info
giladgd Mar 23, 2024
d30f06b
feat: flexible default `gpuLayers` and `contextSize` options that dep…
giladgd Mar 28, 2024
87ff5e8
feat: simplify `chat`, `complete` and `infill` commands, list GPU dev…
giladgd Mar 28, 2024
cc5cb5b
docs: update README.md
giladgd Mar 28, 2024
d50f3a4
fix: CUDA GPU info
giladgd Mar 28, 2024
7342633
fix: CUDA GPU info
giladgd Mar 29, 2024
f6ca540
fix: CUDA GPU info
giladgd Mar 29, 2024
98d0b88
fix: use the CUDA integration instead of the deprecated cuBLAS integr…
giladgd Mar 29, 2024
c9c3583
fix: Vulkan GPU info
giladgd Mar 29, 2024
e9fe208
test: fix snapshots
giladgd Mar 29, 2024
f499451
test: add a sanity test to validate that an input is tokenized proper…
giladgd Mar 29, 2024
2e324b2
feat(`JinjaTemplateChatWrapper`): use `tokenizer.chat_template` from …
giladgd Apr 2, 2024
517d8ee
feat: improve `resolveChatWrapper` resolution algorithm
giladgd Apr 2, 2024
a8c677a
refactor: rename `SpecialToken` to `SpecialTokensText` and separate `…
giladgd Apr 2, 2024
128618b
refactor: rename `BuiltinSpecialToken` to `SpecialToken`
giladgd Apr 2, 2024
d754b4b
feat: improve control of leading space in tokenization, trim leading …
giladgd Apr 2, 2024
1037959
fix: bugs
giladgd Apr 2, 2024
e4ccbff
feat: add `noJinja` and `noTrimWhitespace` flags to the `chat` command
giladgd Apr 2, 2024
a61ba62
test: update `parseModelFileName.test.ts`
giladgd Apr 2, 2024
eecf2c3
test: move files
giladgd Apr 2, 2024
c95f2fe
fix: bugs
giladgd Apr 2, 2024
dc5dfca
test: fix tests
giladgd Apr 2, 2024
547cfa8
fix: macOS build
giladgd Apr 2, 2024
367e043
fix: update `lifecycle-utils` to improve `splitText`'s runtime effici…
giladgd Apr 2, 2024
ccae9fe
test: add sensible timeouts
giladgd Apr 2, 2024
6ec6829
fix: vitest config
giladgd Apr 3, 2024
8f08876
feat: `inspect measure` command
giladgd Apr 3, 2024
ffb0eab
feat: improve VRAM consumption estimations
giladgd Apr 4, 2024
f2e52d3
test: update tests
giladgd Apr 4, 2024
67f89c6
feat: improve VRAM consumption estimations
giladgd Apr 4, 2024
cb2f3c8
feat: reserve memory for a model/context before its creation and rele…
giladgd Apr 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ node_modules
/.eslintcache
/.vitepress/.cache
/test/.models
/test/temp
/coverage

/llama/compile_commands.json
Expand All @@ -20,6 +21,8 @@ node_modules
/llama/lastBuild.json
/llama/gitRelease.bundle
/llama/.temp
/llama/.idea
/llama/cmake-build-debug
/llama/localBuilds
/llama/Release
/llama/Debug
Expand Down
31 changes: 28 additions & 3 deletions .vitepress/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,22 @@ export default defineConfig({
pageData.frontmatter.editLink = false;
pageData.frontmatter.lastUpdated = false;
}

let canonicalUrl = hostname + pageData.relativePath;
if (canonicalUrl.endsWith("/index.html"))
canonicalUrl = canonicalUrl.slice(0, -"index.html".length);
if (canonicalUrl.endsWith("/index.md"))
canonicalUrl = canonicalUrl.slice(0, -"index.md".length);
else if (canonicalUrl.endsWith(".html"))
canonicalUrl = canonicalUrl.slice(0, -".html".length);
else if (canonicalUrl.endsWith(".md"))
canonicalUrl = canonicalUrl.slice(0, -".md".length);

pageData.frontmatter.head ??= [];
pageData.frontmatter.head.push([
"link",
{rel: "canonical", href: canonicalUrl}
])
},
themeConfig: {
editLink: {
Expand Down Expand Up @@ -183,7 +199,16 @@ export default defineConfig({
{text: "Download", link: "/download"},
{text: "Complete", link: "/complete"},
{text: "Infill", link: "/infill"},
{text: "Inspect", link: "/inspect"},
{
text: "Inspect",
link: "/inspect",
collapsed: true,
items: [
{text: "GPU", link: "/inspect/gpu"},
{text: "GGUF", link: "/inspect/gguf"},
{text: "Measure", link: "/inspect/measure"},
]
},
{text: "Build", link: "/build"},
{text: "Clear", link: "/clear"}
]
Expand Down Expand Up @@ -302,7 +327,7 @@ function orderClasses(sidebar: typeof typedocSidebar) {
items: []
};
(classes.items as DefaultTheme.SidebarItem[]).push(LlamaTextGroup);
const LlamaTextGroupItemsOrder = ["SpecialToken", "BuiltinSpecialToken"];
const LlamaTextGroupItemsOrder = ["SpecialTokensText", "SpecialToken"];

groupItems(
classes.items,
Expand All @@ -327,7 +352,7 @@ function orderTypes(sidebar: typeof typedocSidebar) {
(item) => (
item.text === "BatchItem" ||
item.text === "CustomBatchingDispatchSchedule" ||
item.text === "CustomBatchingPrioritizeStrategy" ||
item.text === "CustomBatchingPrioritizationStrategy" ||
item.text === "PrioritizedBatchItem"
),
{collapsed: false}
Expand Down
92 changes: 81 additions & 11 deletions .vitepress/utils/getCommandHtmlDoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,54 @@ import {cliBinName, npxRunPrefix} from "../../src/config.js";
import {buildHtmlTable} from "./buildHtmlTable.js";
import {buildHtmlHeading} from "./buildHtmlHeading.js";

export async function getCommandHtmlDoc(command: CommandModule<any, any>, cliName: string = cliBinName) {
const title = cliName + " " + (command.command ?? "");
export async function getCommandHtmlDoc(command: CommandModule<any, any>, {
cliName = cliBinName,
parentCommand,
subCommandsParentPageLink
}: {
cliName?: string,
parentCommand?: CommandModule<any, any>,
subCommandsParentPageLink?: string
} = {}) {
const currentCommandCliCommand = resolveCommandCliCommand(command);
const resolvedParentCommandCliCommand = resolveCommandCliCommand(parentCommand);
const title = cliName + " " + (resolvedParentCommandCliCommand ?? "<command>").replace("<command>", currentCommandCliCommand ?? "");
const description = command.describe ?? "";
const optionGroups = await getOptionsGroupFromCommand(command);
const {subCommands, optionGroups} = await parseCommandDefinition(command);

let res = "";

if (subCommands.length > 0) {
res += buildHtmlHeading("h2", htmlEscape("Commands"), "commands");

res += buildHtmlTable(
[
"Command",
"Description"
].map(htmlEscape),
subCommands
.map((subCommand) => {
if (subCommand.command == null || subCommand.describe === false)
return null;

const resolvedCommandCliCommand = resolveCommandCliCommand(subCommand) ?? "";
const commandPageLink = resolveCommandPageLink(subCommand);

let cliCommand = resolvedCommandCliCommand;
cliCommand = (currentCommandCliCommand ?? "<command>").replace("<command>", cliCommand);

if (parentCommand != null)
cliCommand = (resolvedParentCommandCliCommand ?? "<command>").replace("<command>", cliCommand);

return [
`<a href="${subCommandsParentPageLink != null ? (subCommandsParentPageLink + "/") : ""}${commandPageLink}"><code>` + htmlEscape(cliName + " " + cliCommand) + "</code></a>",
htmlEscape(String(subCommand.describe ?? ""))
];
})
.filter((row): row is string[] => row != null)
);
}

if (optionGroups.length !== 0) {
res += buildHtmlHeading("h2", htmlEscape("Options"), "options");

Expand All @@ -37,7 +78,10 @@ export async function getCommandHtmlDoc(command: CommandModule<any, any>, cliNam
}


async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Promise<OptionsGroup[]> {
async function parseCommandDefinition(command: CommandModule<any, any>): Promise<{
subCommands: CommandModule<any, any>[],
optionGroups: OptionsGroup[]
}> {
const yargsStub = getYargsStub();
function getYargsStub() {
function option(name: string, option: Options) {
Expand All @@ -57,10 +101,16 @@ async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Pro
return yargsStub;
}

return {option};
function command(subCommand: CommandModule<any, any>) {
subCommands.push(subCommand);
return yargsStub;
}

return {option, command};
}

const options: Record<string, {name: string, option: Options}[]> = {};
const subCommands: CommandModule<any, any>[] = [];
const groups: string[] = [];

if (command.builder instanceof Function)
Expand Down Expand Up @@ -97,10 +147,13 @@ async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Pro
return 0;
});

return groups.map((group) => ({
name: normalizeGroupName(group),
options: options[group]!
}));
return {
subCommands,
optionGroups: groups.map((group) => ({
name: normalizeGroupName(group),
options: options[group]!
}))
};
}

function normalizeGroupName(groupName: string): string {
Expand Down Expand Up @@ -156,8 +209,12 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}

let optionDescription: string[] = option.description != null ? [htmlEscape(option.description)] : [];

if (option.default != null) {
optionDescription.push(`(${htmlEscape("default: ")}<code>${htmlEscape(option.default)}</code>)`);
const hasDefaultDescription = option.defaultDescription != null && option.defaultDescription.trim().length > 0;
if (option.default != null || hasDefaultDescription) {
if (hasDefaultDescription && option.defaultDescription != null)
optionDescription.push(`<span style="opacity: 0.72">(${htmlEscape("default: ")}${htmlEscape(option.defaultDescription.trim())})</span>`);
else
optionDescription.push(`<span style="opacity: 0.72">(${htmlEscape("default: ")}<code>${htmlEscape(option.default)}</code>)</span>`);
}

if (option.type != null) {
Expand All @@ -184,6 +241,19 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}
return buildHtmlTable(tableHeaders, tableRows);
}

function resolveCommandCliCommand(command?: CommandModule<any, any>) {
if (command == null)
return undefined;

return command.command instanceof Array
? command.command[0]
: command.command;
}

function resolveCommandPageLink(command: CommandModule<any, any>) {
return resolveCommandCliCommand(command)?.split(" ")?.[0];
}

type OptionsGroup = {
name: string,
options: Array<{
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
## Features
* Run a text generation model locally on your machine
* Metal, CUDA and Vulkan support
* Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python
* Pre-built binaries are provided, with a fallback to building from source _**without**_ `node-gyp` or Python
* Chat with a model using a chat wrapper
* Use the CLI to chat with a model without writing any code
* Up-to-date with the latest version of `llama.cpp`. Download and compile the latest release with a single CLI command.
Expand Down
20 changes: 18 additions & 2 deletions docs/guide/cli/cli.data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ import {BuildCommand} from "../../../src/cli/commands/BuildCommand.js";
import {ChatCommand} from "../../../src/cli/commands/ChatCommand.js";
import {CompleteCommand} from "../../../src/cli/commands/CompleteCommand.js";
import {InfillCommand} from "../../../src/cli/commands/InfillCommand.js";
import {InspectCommand} from "../../../src/cli/commands/InspectCommand.js";
import {InspectCommand} from "../../../src/cli/commands/inspect/InspectCommand.js";
import {InspectGpuCommand} from "../../../src/cli/commands/inspect/commands/InspectGpuCommand.js";
import {InspectGgufCommand} from "../../../src/cli/commands/inspect/commands/InspectGgufCommand.js";
import {DownloadCommand} from "../../../src/cli/commands/DownloadCommand.js";
import {ClearCommand} from "../../../src/cli/commands/ClearCommand.js";
import {htmlEscape} from "../../../.vitepress/utils/htmlEscape.js";
import {cliBinName, npxRunPrefix} from "../../../src/config.js";
import {buildHtmlHeading} from "../../../.vitepress/utils/buildHtmlHeading.js";
import {buildHtmlTable} from "../../../.vitepress/utils/buildHtmlTable.js";
import {setIsInDocumentationMode} from "../../../src/state.js";
import {InspectMeasureCommand} from "../../../src/cli/commands/inspect/commands/InspectMeasureCommand.js";

export default {
async load() {
Expand All @@ -31,7 +34,20 @@ export default {
chat: await getCommandHtmlDoc(ChatCommand),
complete: await getCommandHtmlDoc(CompleteCommand),
infill: await getCommandHtmlDoc(InfillCommand),
inspect: await getCommandHtmlDoc(InspectCommand),
inspect: {
index: await getCommandHtmlDoc(InspectCommand, {
subCommandsParentPageLink: "inspect"
}),
gpu: await getCommandHtmlDoc(InspectGpuCommand, {
parentCommand: InspectCommand
}),
gguf: await getCommandHtmlDoc(InspectGgufCommand, {
parentCommand: InspectCommand
}),
measure: await getCommandHtmlDoc(InspectMeasureCommand, {
parentCommand: InspectCommand
})
},
download: await getCommandHtmlDoc(DownloadCommand),
build: await getCommandHtmlDoc(BuildCommand),
clear: await getCommandHtmlDoc(ClearCommand)
Expand Down
2 changes: 1 addition & 1 deletion docs/guide/cli/inspect.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ outline: deep

<script setup lang="ts">
import {data as docs} from "./cli.data.js";
const commandDoc = docs.inspect;
const commandDoc = docs.inspect.index;
</script>

{{commandDoc.description}}
Expand Down
17 changes: 17 additions & 0 deletions docs/guide/cli/inspect/gguf.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
outline: deep
---
# `inspect gguf` command

<script setup lang="ts">
import {data as docs} from "../cli.data.js";
const commandDoc = docs.inspect.gguf;
</script>

{{commandDoc.description}}

## Usage
```shell-vue
{{commandDoc.usage}}
```
<div v-html="commandDoc.options"></div>
17 changes: 17 additions & 0 deletions docs/guide/cli/inspect/gpu.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
outline: deep
---
# `inspect gpu` command

<script setup lang="ts">
import {data as docs} from "../cli.data.js";
const commandDoc = docs.inspect.gpu;
</script>

{{commandDoc.description}}

## Usage
```shell-vue
{{commandDoc.usage}}
```
<div v-html="commandDoc.options"></div>
17 changes: 17 additions & 0 deletions docs/guide/cli/inspect/measure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
outline: deep
---
# `inspect measure` command

<script setup lang="ts">
import {data as docs} from "../cli.data.js";
const commandDoc = docs.inspect.measure;
</script>

{{commandDoc.description}}

## Usage
```shell-vue
{{commandDoc.usage}}
```
<div v-html="commandDoc.options"></div>
10 changes: 5 additions & 5 deletions llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ include_directories("gpuInfo")
include_directories("llama.cpp")
include_directories("./llama.cpp/common")

if (LLAMA_CUBLAS)
if (LLAMA_CUDA)
cmake_minimum_required(VERSION 3.17)

find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
message(STATUS "Using cuBLAS for GPU info")
message(STATUS "Using CUDA for GPU info")

enable_language(CUDA)

set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/cuda-gpu-info.h)
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/cuda-gpu-info.cu)

add_compile_definitions(GPU_INFO_USE_CUBLAS)
add_compile_definitions(GPU_INFO_USE_CUDA)

if (LLAMA_STATIC)
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
Expand All @@ -60,7 +60,7 @@ if (LLAMA_CUBLAS)
endif()
endif()
else()
message(FATAL_ERROR "cuBLAS was not found")
message(FATAL_ERROR "CUDA was not found")
endif()
endif()

Expand Down Expand Up @@ -100,7 +100,7 @@ if (LLAMA_HIPBLAS)

if (${hipblas_FOUND} AND ${hip_FOUND})
message(STATUS "Using HIP and hipBLAS for GPU info")
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUBLAS)
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA)
add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
Expand Down
Loading
Loading