@@ -122,12 +122,13 @@ int main(int argc, char ** argv) {
122
122
}
123
123
}
124
124
125
- LOG_TEE (" \n " );
126
- LOG_TEE (" %s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n " , __func__, n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.is_pp_shared , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch );
127
- LOG_TEE (" \n " );
128
-
129
- LOG_TEE (" |%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n " , " PP" , " TG" , " B" , " N_KV" , " T_PP s" , " S_PP t/s" , " T_TG s" , " S_TG t/s" , " T s" , " S t/s" );
130
- LOG_TEE (" |%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n " , " ------" , " ------" , " ----" , " ------" , " --------" , " --------" , " --------" , " --------" , " --------" , " --------" );
125
+ if (!params.batched_bench_output_jsonl ) {
126
+ LOG_TEE (" \n " );
127
+ LOG_TEE (" %s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n " , __func__, n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.is_pp_shared , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch );
128
+ LOG_TEE (" \n " );
129
+ LOG_TEE (" |%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n " , " PP" , " TG" , " B" , " N_KV" , " T_PP s" , " S_PP t/s" , " T_TG s" , " S_TG t/s" , " T s" , " S t/s" );
130
+ LOG_TEE (" |%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n " , " ------" , " ------" , " ----" , " ------" , " --------" , " --------" , " --------" , " --------" , " --------" , " --------" );
131
+ }
131
132
132
133
for ( int i_pp = 0 ; i_pp < (int ) n_pp.size (); ++i_pp) {
133
134
for ( int i_tg = 0 ; i_tg < (int ) n_tg.size (); ++i_tg) {
@@ -195,7 +196,16 @@ int main(int argc, char ** argv) {
195
196
const float speed_tg = pl*tg / t_tg;
196
197
const float speed = n_kv / t;
197
198
198
- LOG_TEE (" |%6d | %6d | %4d | %6d | %8.3f | %8.2f | %8.3f | %8.2f | %8.3f | %8.2f |\n " , pp, tg, pl, n_kv, t_pp, speed_pp, t_tg, speed_tg, t, speed);
199
+ if (params.batched_bench_output_jsonl ) {
200
+ LOG_TEE (
201
+ " {\" n_kv_max\" : %d, \" n_batch\" : %d, \" n_ubatch\" : %d, \" flash_attn\" : %d, \" is_pp_shared\" : %d, \" n_gpu_layers\" : %d, \" n_threads\" : %u, \" n_threads_batch\" : %u, "
202
+ " \" pp\" : %d, \" tg\" : %d, \" pl\" : %d, \" n_kv\" : %d, \" t_pp\" : %f, \" speed_pp\" : %f, \" t_tg\" : %f, \" speed_tg\" : %f, \" t\" : %f, \" speed\" : %f}\n " ,
203
+ n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.is_pp_shared , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch ,
204
+ pp, tg, pl, n_kv, t_pp, speed_pp, t_tg, speed_tg, t, speed
205
+ );
206
+ } else {
207
+ LOG_TEE (" |%6d | %6d | %4d | %6d | %8.3f | %8.2f | %8.3f | %8.2f | %8.3f | %8.2f |\n " , pp, tg, pl, n_kv, t_pp, speed_pp, t_tg, speed_tg, t, speed);
208
+ }
199
209
}
200
210
}
201
211
}
0 commit comments