Skip to content

Commit 1b28061

Browse files
authored
llama : skip token bounds check when evaluating embeddings (#9437)
1 parent 8db003a commit 1b28061

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

src/llama.cpp

+18-14
Original file line numberDiff line numberDiff line change
@@ -16076,19 +16076,21 @@ static int llama_decode_internal(
1607616076
return -1;
1607716077
}
1607816078

16079-
for (uint32_t i = 0; i < n_tokens_all; ++i) {
16080-
if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= lctx.model.vocab.n_vocab) {
16081-
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
16082-
return -1;
16083-
}
16084-
}
16085-
1608616079
const auto & model = lctx.model;
1608716080
const auto & hparams = model.hparams;
1608816081
const auto & cparams = lctx.cparams;
1608916082

1609016083
GGML_ASSERT((!batch_all.token && batch_all.embd) || (batch_all.token && !batch_all.embd)); // NOLINT
1609116084

16085+
if (batch_all.token) {
16086+
for (uint32_t i = 0; i < n_tokens_all; ++i) {
16087+
if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= model.vocab.n_vocab) {
16088+
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
16089+
return -1;
16090+
}
16091+
}
16092+
}
16093+
1609216094
GGML_ASSERT(n_tokens_all <= cparams.n_batch);
1609316095

1609416096
GGML_ASSERT((cparams.causal_attn || cparams.n_ubatch >= n_tokens_all) && "non-causal attention requires n_ubatch >= n_tokens");
@@ -16375,19 +16377,21 @@ static int llama_encode_internal(
1637516377
return -1;
1637616378
}
1637716379

16378-
for (uint32_t i = 0; i < n_tokens; ++i) {
16379-
if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= lctx.model.vocab.n_vocab) {
16380-
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
16381-
return -1;
16382-
}
16383-
}
16384-
1638516380
const auto & model = lctx.model;
1638616381
const auto & hparams = model.hparams;
1638716382
const auto & cparams = lctx.cparams;
1638816383

1638916384
GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); // NOLINT
1639016385

16386+
if (batch.token) {
16387+
for (uint32_t i = 0; i < n_tokens; ++i) {
16388+
if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= model.vocab.n_vocab) {
16389+
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
16390+
return -1;
16391+
}
16392+
}
16393+
}
16394+
1639116395
// micro-batching is not possible for non-causal encoding, so we process the batch in a single shot
1639216396
GGML_ASSERT(cparams.n_ubatch >= n_tokens && "encoder requires n_ubatch >= n_tokens");
1639316397

0 commit comments

Comments
 (0)