Skip to content

Commit 7808573

Browse files
authored
Merge branch 'master' into feat-roles
2 parents d7d084c + 566b5cf commit 7808573

File tree

5 files changed

+63
-59
lines changed

5 files changed

+63
-59
lines changed

.github/workflows/release.yaml

+6-26
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: Build and Release
22

3-
on:
3+
on:
44
- push
55
- pull_request
66

@@ -16,15 +16,6 @@ concurrency:
1616

1717
jobs:
1818
build-linux:
19-
strategy:
20-
matrix:
21-
include:
22-
- build: ''
23-
defines: ''
24-
- build: 'cuda12'
25-
defines: ''
26-
- build: 'cuda11'
27-
defines: ''
2819
runs-on: ubuntu-latest
2920
steps:
3021
- name: Clone
@@ -40,17 +31,13 @@ jobs:
4031
sudo apt-get update
4132
sudo apt-get install build-essential ffmpeg protobuf-compiler
4233
- name: Install CUDA Dependencies
43-
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
4434
run: |
45-
if [ "${{ matrix.build }}" == "cuda12" ]; then
46-
export CUDA_VERSION=12-3
47-
else
48-
export CUDA_VERSION=11-7
49-
fi
5035
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
5136
sudo dpkg -i cuda-keyring_1.1-1_all.deb
5237
sudo apt-get update
5338
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
39+
env:
40+
CUDA_VERSION: 12-3
5441
- name: Cache grpc
5542
id: cache-grpc
5643
uses: actions/cache@v4
@@ -69,22 +56,15 @@ jobs:
6956
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
7057
- name: Build
7158
id: build
72-
env:
73-
BUILD_ID: "${{ matrix.build }}"
7459
run: |
7560
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
7661
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
7762
export PATH=$PATH:$GOPATH/bin
78-
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
79-
export BUILD_TYPE=cublas
80-
export PATH=/usr/local/cuda/bin:$PATH
81-
make dist
82-
else
83-
STATIC=true make dist
84-
fi
63+
export PATH=/usr/local/cuda/bin:$PATH
64+
make dist
8565
- uses: actions/upload-artifact@v4
8666
with:
87-
name: LocalAI-linux-${{ matrix.build }}
67+
name: LocalAI-linux
8868
path: release/
8969
- name: Release
9070
uses: softprops/action-gh-release@v2

.github/workflows/test.yml

+11-3
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
df -h
5858
- name: Clone
5959
uses: actions/checkout@v4
60-
with:
60+
with:
6161
submodules: true
6262
- name: Setup Go ${{ matrix.go-version }}
6363
uses: actions/setup-go@v5
@@ -87,6 +87,12 @@ jobs:
8787
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
8888
rm protoc.zip
8989
90+
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
91+
sudo dpkg -i cuda-keyring_1.1-1_all.deb
92+
sudo apt-get update
93+
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
94+
export CUDACXX=/usr/local/cuda/bin/nvcc
95+
9096
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
9197
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
9298
@@ -102,6 +108,8 @@ jobs:
102108
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
103109
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
104110
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
111+
env:
112+
CUDA_VERSION: 12-3
105113
- name: Cache grpc
106114
id: cache-grpc
107115
uses: actions/cache@v4
@@ -166,7 +174,7 @@ jobs:
166174
df -h
167175
- name: Clone
168176
uses: actions/checkout@v4
169-
with:
177+
with:
170178
submodules: true
171179
- name: Build images
172180
run: |
@@ -192,7 +200,7 @@ jobs:
192200
steps:
193201
- name: Clone
194202
uses: actions/checkout@v4
195-
with:
203+
with:
196204
submodules: true
197205
- name: Setup Go ${{ matrix.go-version }}
198206
uses: actions/setup-go@v5

Makefile

+16-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
1616
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
1717

1818
# whisper.cpp version
19-
WHISPER_CPP_VERSION?=4ef8d9f44eb402c528ab6d990ab50a9f4f666347
19+
WHISPER_CPP_VERSION?=9d5771ae43d7fc7cca9d31dd924b13a29144e476
2020

2121
# bert.cpp version
2222
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
@@ -319,7 +319,14 @@ build-minimal:
319319
build-api:
320320
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
321321

322-
dist: build
322+
dist:
323+
STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
324+
ifeq ($(OS),Darwin)
325+
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
326+
else
327+
$(MAKE) backend-assets/grpc/llama-cpp-cuda
328+
endif
329+
$(MAKE) build
323330
mkdir -p release
324331
# if BUILD_ID is empty, then we don't append it to the binary name
325332
ifeq ($(BUILD_ID),)
@@ -677,6 +684,13 @@ ifeq ($(BUILD_TYPE),metal)
677684
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
678685
endif
679686

687+
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
688+
cp -rf backend/cpp/llama backend/cpp/llama-cuda
689+
$(MAKE) -C backend/cpp/llama-cuda purge
690+
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
691+
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
692+
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
693+
680694
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
681695
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
682696
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/

go.sum

-16
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7O
22
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
33
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
44
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
5-
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
6-
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
75
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
86
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
97
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
@@ -62,8 +60,6 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh
6260
github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
6361
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
6462
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
65-
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4=
66-
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
6763
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
6864
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
6965
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
@@ -73,8 +69,6 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
7369
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
7470
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
7571
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
76-
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ=
77-
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
7872
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
7973
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
8074
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
@@ -99,10 +93,6 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z
9993
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
10094
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
10195
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
102-
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y=
103-
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc=
104-
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY=
105-
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
10696
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
10797
github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
10898
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
@@ -226,18 +216,12 @@ github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/I
226216
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
227217
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk=
228218
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc=
229-
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
230-
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
231219
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
232220
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0=
233-
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks=
234-
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
235221
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
236222
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
237223
github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
238224
github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
239-
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA=
240-
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
241225
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
242226
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
243227
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=

pkg/model/initializers.go

+30-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"time"
1212

1313
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
14+
"github.com/go-skynet/LocalAI/pkg/xsysinfo"
1415
"github.com/phayes/freeport"
1516
"github.com/rs/zerolog/log"
1617
"golang.org/x/sys/cpu"
@@ -29,10 +30,12 @@ const (
2930
LlamaGGML = "llama-ggml"
3031

3132
LLamaCPP = "llama-cpp"
33+
3234
LLamaCPPCUDA12 = "llama-cpp-cuda12"
3335
LLamaCPPAVX2 = "llama-cpp-avx2"
3436
LLamaCPPAVX = "llama-cpp-avx"
3537
LLamaCPPFallback = "llama-cpp-fallback"
38+
LLamaCPPCUDA = "llama-cpp-cuda"
3639

3740
Gpt4AllLlamaBackend = "gpt4all-llama"
3841
Gpt4AllMptBackend = "gpt4all-mpt"
@@ -72,8 +75,7 @@ ENTRY:
7275
}
7376
}
7477
if !e.IsDir() {
75-
//backends = append(backends, e.Name())
76-
if !strings.Contains(e.Name(), LLamaCPP) {
78+
if !strings.Contains(e.Name(), LLamaCPP) || strings.Contains(e.Name(), LLamaCPPFallback) {
7779
backends[e.Name()] = []string{}
7880
}
7981
}
@@ -104,7 +106,7 @@ ENTRY:
104106
// First has more priority
105107
priorityList := []string{
106108
// First llama.cpp and llama-ggml
107-
LLamaCPP, LlamaGGML, Gpt4All,
109+
LLamaCPP, LlamaGGML, Gpt4All, LLamaCPPFallback,
108110
}
109111

110112
toTheEnd := []string{
@@ -190,17 +192,33 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
190192
} else {
191193
grpcProcess := backendPath(o.assetDir, backend)
192194

195+
foundCUDA := false
193196
// for llama-cpp, check CPU capabilities and load the appropriate variant
194197
if backend == LLamaCPP {
195-
if cpu.X86.HasAVX2 {
196-
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
197-
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX2)
198-
} else if cpu.X86.HasAVX {
199-
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
200-
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX)
201-
} else {
202-
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
203-
grpcProcess = backendPath(o.assetDir, LLamaCPPFallback)
198+
gpus, err := xsysinfo.GPUs()
199+
if err == nil {
200+
for _, gpu := range gpus {
201+
if strings.Contains(gpu.String(), "nvidia") {
202+
log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
203+
grpcProcess = backendPath(o.assetDir, LLamaCPPCUDA)
204+
if _, err := os.Stat(grpcProcess); err == nil {
205+
foundCUDA = true
206+
}
207+
}
208+
}
209+
}
210+
211+
if !foundCUDA {
212+
if cpu.X86.HasAVX2 {
213+
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
214+
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX2)
215+
} else if cpu.X86.HasAVX {
216+
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
217+
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX)
218+
} else {
219+
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
220+
grpcProcess = backendPath(o.assetDir, LLamaCPPFallback)
221+
}
204222
}
205223
}
206224

0 commit comments

Comments
 (0)