Skip to content

Commit 815b1fb

Browse files
authored
batched-bench : add --output-format jsonl option (#9293)
`--output-format` is modeled after `llama-bench`'s options
1 parent 409dc4f commit 815b1fb

File tree

4 files changed

+40
-7
lines changed

4 files changed

+40
-7
lines changed

common/common.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -1678,6 +1678,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
16781678
else { invalid_param = true; }
16791679
return true;
16801680
}
1681+
if (arg == "--output-format") {
1682+
CHECK_ARG
1683+
std::string value(argv[i]);
1684+
/**/ if (value == "jsonl") { params.batched_bench_output_jsonl = true; }
1685+
else if (value == "md") { params.batched_bench_output_jsonl = false; }
1686+
else { invalid_param = true; }
1687+
return true;
1688+
}
16811689
if (arg == "--no-warmup") {
16821690
params.warmup = false;
16831691
return true;
@@ -2068,6 +2076,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
20682076
options.push_back({ "export-lora", " --lora-scaled FNAME S", "path to LoRA adapter with user defined scaling S (can be repeated to use multiple adapters)" });
20692077
options.push_back({ "export-lora", "-o, --output FNAME", "output file (default: '%s')", params.lora_outfile.c_str() });
20702078

2079+
options.push_back({ "batched-bench" });
2080+
options.push_back({ "batched-bench", " --output-format {md,jsonl}", "output format for batched-bench results (default: md)" });
2081+
20712082
printf("usage: %s [options]\n", argv[0]);
20722083

20732084
for (const auto & o : options) {

common/common.h

+3
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@ struct gpt_params {
275275
bool spm_infill = false; // suffix/prefix/middle pattern for infill
276276

277277
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
278+
279+
// batched-bench params
280+
bool batched_bench_output_jsonl = false;
278281
};
279282

280283
void gpt_params_parse_from_env(gpt_params & params);

examples/batched-bench/README.md

+9
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,12 @@ There are 2 modes of operation:
4949
| 128 | 256 | 8 | 3072 | 0.751 | 1363.92 | 15.110 | 135.54 | 15.861 | 193.69 |
5050
| 128 | 256 | 16 | 6144 | 1.569 | 1304.93 | 18.073 | 226.64 | 19.642 | 312.80 |
5151
| 128 | 256 | 32 | 12288 | 3.409 | 1201.35 | 19.223 | 426.15 | 22.633 | 542.93 |
52+
53+
### JSONL output
54+
55+
Pass `--output-format jsonl` to output JSONL instead of Markdown, á la
56+
57+
```json lines
58+
{"n_kv_max": 2048, "n_batch": 2048, "n_ubatch": 512, "flash_attn": 0, "is_pp_shared": 0, "n_gpu_layers": 99, "n_threads": 8, "n_threads_batch": 8, "pp": 128, "tg": 128, "pl": 1, "n_kv": 256, "t_pp": 0.233810, "speed_pp": 547.453064, "t_tg": 3.503684, "speed_tg": 36.532974, "t": 3.737494, "speed": 68.495094}
59+
{"n_kv_max": 2048, "n_batch": 2048, "n_ubatch": 512, "flash_attn": 0, "is_pp_shared": 0, "n_gpu_layers": 99, "n_threads": 8, "n_threads_batch": 8, "pp": 128, "tg": 128, "pl": 2, "n_kv": 512, "t_pp": 0.422602, "speed_pp": 605.770935, "t_tg": 11.106112, "speed_tg": 23.050371, "t": 11.528713, "speed": 44.410854}
60+
```

examples/batched-bench/batched-bench.cpp

+17-7
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,13 @@ int main(int argc, char ** argv) {
122122
}
123123
}
124124

125-
LOG_TEE("\n");
126-
LOG_TEE("%s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, params.n_batch, params.n_ubatch, params.flash_attn, params.is_pp_shared, params.n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch);
127-
LOG_TEE("\n");
128-
129-
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
130-
LOG_TEE("|%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n", "------", "------", "----", "------", "--------", "--------", "--------", "--------", "--------", "--------");
125+
if (!params.batched_bench_output_jsonl) {
126+
LOG_TEE("\n");
127+
LOG_TEE("%s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, params.n_batch, params.n_ubatch, params.flash_attn, params.is_pp_shared, params.n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch);
128+
LOG_TEE("\n");
129+
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
130+
LOG_TEE("|%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n", "------", "------", "----", "------", "--------", "--------", "--------", "--------", "--------", "--------");
131+
}
131132

132133
for ( int i_pp = 0; i_pp < (int) n_pp.size(); ++i_pp) {
133134
for ( int i_tg = 0; i_tg < (int) n_tg.size(); ++i_tg) {
@@ -195,7 +196,16 @@ int main(int argc, char ** argv) {
195196
const float speed_tg = pl*tg / t_tg;
196197
const float speed = n_kv / t;
197198

198-
LOG_TEE("|%6d | %6d | %4d | %6d | %8.3f | %8.2f | %8.3f | %8.2f | %8.3f | %8.2f |\n", pp, tg, pl, n_kv, t_pp, speed_pp, t_tg, speed_tg, t, speed);
199+
if(params.batched_bench_output_jsonl) {
200+
LOG_TEE(
201+
"{\"n_kv_max\": %d, \"n_batch\": %d, \"n_ubatch\": %d, \"flash_attn\": %d, \"is_pp_shared\": %d, \"n_gpu_layers\": %d, \"n_threads\": %u, \"n_threads_batch\": %u, "
202+
"\"pp\": %d, \"tg\": %d, \"pl\": %d, \"n_kv\": %d, \"t_pp\": %f, \"speed_pp\": %f, \"t_tg\": %f, \"speed_tg\": %f, \"t\": %f, \"speed\": %f}\n",
203+
n_kv_max, params.n_batch, params.n_ubatch, params.flash_attn, params.is_pp_shared, params.n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch,
204+
pp, tg, pl, n_kv, t_pp, speed_pp, t_tg, speed_tg, t, speed
205+
);
206+
} else {
207+
LOG_TEE("|%6d | %6d | %4d | %6d | %8.3f | %8.2f | %8.3f | %8.2f | %8.3f | %8.2f |\n", pp, tg, pl, n_kv, t_pp, speed_pp, t_tg, speed_tg, t, speed);
208+
}
199209
}
200210
}
201211
}

0 commit comments

Comments
 (0)