llama.cpp: fix warning message (ggml-org#11839)

okuvshynov · mglambda · commit 091adbaf6d65 · 2025-03-08T10:20:04.000+01:00
There was a typo-like error, which would print the same number twice if
request is received with n_predict &gt; server-side config.

Before the fix:
```
slot launch_slot_: id  0 | task 0 | n_predict = 4096 exceeds server configuration, setting to 4096
```

After the fix:
```
slot launch_slot_: id  0 | task 0 | n_predict = 8192 exceeds server configuration, setting to 4096
```
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -2073,8 +2073,8 @@ struct server_context {
 
         if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {
             // Might be better to reject the request with a 400 ?
+            SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.params.n_predict, slot.n_predict);
             slot.params.n_predict = slot.n_predict;
-            SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.n_predict, slot.n_predict);
         }
 
         if (slot.params.ignore_eos && has_eos_token) {

Original file line number	Diff line number	Diff line change
`@@ -2073,8 +2073,8 @@ struct server_context {`
`2073`	`2073`
`2074`	`2074`	`if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {`
`2075`	`2075`	`// Might be better to reject the request with a 400 ?`
	`2076`	`+ SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.params.n_predict, slot.n_predict);`
`2076`	`2077`	`slot.params.n_predict = slot.n_predict;`
`2077`		`- SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.n_predict, slot.n_predict);`
`2078`	`2078`	`}`
`2079`	`2079`
`2080`	`2080`	`if (slot.params.ignore_eos && has_eos_token) {`