|
3 | 3 | #include "build-info.h"
|
4 | 4 | #include "grammar-parser.h"
|
5 | 5 |
|
6 |
| -// #define SERVER_MULTIMODAL_SUPPORT |
| 6 | +#define SERVER_MULTIMODAL_SUPPORT |
7 | 7 |
|
8 | 8 | #ifdef SERVER_MULTIMODAL_SUPPORT
|
9 | 9 | #include "../llava/clip.h"
|
@@ -78,7 +78,7 @@ std::vector<uint8_t> base64_decode(std::string const& encoded_string) {
|
78 | 78 | int i = 0;
|
79 | 79 | int j = 0;
|
80 | 80 | int in_ = 0;
|
81 |
| - BYTE char_array_4[4], char_array_3[3]; |
| 81 | + unsigned char char_array_4[4], char_array_3[3]; |
82 | 82 | std::vector<uint8_t> ret;
|
83 | 83 | while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
|
84 | 84 | char_array_4[i++] = encoded_string[in_]; in_++;
|
@@ -884,9 +884,10 @@ struct llama_server_context
|
884 | 884 |
|
885 | 885 | // append prefix of next image
|
886 | 886 | batch.n_tokens = 0;
|
887 |
| - std::vector<llama_token> append_tokens = tokenize( |
888 |
| - image_idx >= slot.images.size() ? slot.params.input_suffix : // no more images, then process suffix prompt |
889 |
| - slot.images[image_idx].prefix_prompt, true); // has next image |
| 887 | + const auto json_prompt = (image_idx >= slot.images.size()) ? |
| 888 | + slot.params.input_suffix : // no more images, then process suffix prompt |
| 889 | + (json)(slot.images[image_idx].prefix_prompt); |
| 890 | + std::vector<llama_token> append_tokens = tokenize(json_prompt, true); // has next image |
890 | 891 | for (int i = 0; i < append_tokens.size(); ++i) {
|
891 | 892 | batch.token [batch.n_tokens] = append_tokens[i];
|
892 | 893 | batch.pos [batch.n_tokens] = slot.n_past;
|
|
0 commit comments