Skip to content

Commit b6f952f

Browse files
committed
improved exit logic
1 parent a5a5839 commit b6f952f

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

koboldcpp.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,6 @@ def submit_completed_generation(url, jobid, sessionstart, submit_dict):
17601760
global exitcounter, punishcounter, session_kudos_earned, session_jobs, rewardcounter
17611761
reply = make_url_request(url, submit_dict)
17621762
if not reply:
1763-
exitcounter += 1
17641763
punishcounter += 1
17651764
print_with_time(f"Error, Job submit failed.")
17661765
else:
@@ -1778,7 +1777,7 @@ def submit_completed_generation(url, jobid, sessionstart, submit_dict):
17781777
rewardcounter += 1
17791778
if rewardcounter > 50:
17801779
rewardcounter = 0
1781-
if exitcounter > 5:
1780+
if exitcounter >= 1:
17821781
exitcounter -= 1
17831782

17841783
def make_url_request(url, data, method='POST'):
@@ -1815,23 +1814,27 @@ def make_url_request(url, data, method='POST'):
18151814
print(f"===\nEmbedded Horde Worker '{worker_name}' Starting...\n(To use your own KAI Bridge/Scribe worker instead, don't set your API key)")
18161815
BRIDGE_AGENT = f"KoboldCppEmbedWorker:2:https://github.com/LostRuins/koboldcpp"
18171816
cluster = "https://horde.koboldai.net"
1818-
while exitcounter < 35:
1817+
while exitcounter < 10:
18191818
time.sleep(3)
18201819
readygo = make_url_request(f'{epurl}/api/v1/info/version', None,'GET')
18211820
if readygo:
18221821
print_with_time(f"Embedded Horde Worker '{worker_name}' is started.")
18231822
break
18241823

1825-
while exitcounter < 40:
1824+
while exitcounter < 10:
18261825
currentjob_attempts = 0
18271826
current_generation = None
18281827

1829-
if punishcounter >= 8:
1828+
if punishcounter >= 5:
18301829
punishcounter = 0
1831-
penaltymult = (1 + (exitcounter//10))
1832-
print_with_time(f"Horde Worker Paused for {penaltymult*10} min - Too many errors. It will resume automatically, but you should restart it.")
1833-
print_with_time(f"Caution: Too many failed jobs may lead to entering maintenance mode.")
1834-
time.sleep(600 * penaltymult)
1830+
exitcounter += 1
1831+
if exitcounter < 10:
1832+
penaltytime = (2 ** exitcounter)
1833+
print_with_time(f"Horde Worker Paused for {penaltytime} min - Too many errors. It will resume automatically, but you should restart it.")
1834+
print_with_time(f"Caution: Too many failed jobs may lead to entering maintenance mode.")
1835+
time.sleep(60 * penaltytime)
1836+
else:
1837+
print_with_time(f"Exit limit reached, too many errors.")
18351838

18361839
#first, make sure we are not generating
18371840
if modelbusy.locked():
@@ -1850,7 +1853,6 @@ def make_url_request(url, data, method='POST'):
18501853
}
18511854
pop = make_url_request(f'{cluster}/api/v2/generate/text/pop',gen_dict)
18521855
if not pop:
1853-
exitcounter += 1
18541856
punishcounter += 1
18551857
print_with_time(f"Failed to fetch job from {cluster}. Waiting 10 seconds...")
18561858
time.sleep(10)
@@ -1870,7 +1872,7 @@ def make_url_request(url, data, method='POST'):
18701872
print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
18711873

18721874
#do gen
1873-
while exitcounter < 35:
1875+
while exitcounter < 10:
18741876
if not modelbusy.locked():
18751877
current_generation = make_url_request(f'{epurl}/api/v1/generate', current_payload)
18761878
if current_generation:

0 commit comments

Comments
 (0)