@@ -1760,7 +1760,6 @@ def submit_completed_generation(url, jobid, sessionstart, submit_dict):
1760
1760
global exitcounter , punishcounter , session_kudos_earned , session_jobs , rewardcounter
1761
1761
reply = make_url_request (url , submit_dict )
1762
1762
if not reply :
1763
- exitcounter += 1
1764
1763
punishcounter += 1
1765
1764
print_with_time (f"Error, Job submit failed." )
1766
1765
else :
@@ -1778,7 +1777,7 @@ def submit_completed_generation(url, jobid, sessionstart, submit_dict):
1778
1777
rewardcounter += 1
1779
1778
if rewardcounter > 50 :
1780
1779
rewardcounter = 0
1781
- if exitcounter > 5 :
1780
+ if exitcounter >= 1 :
1782
1781
exitcounter -= 1
1783
1782
1784
1783
def make_url_request (url , data , method = 'POST' ):
@@ -1815,23 +1814,27 @@ def make_url_request(url, data, method='POST'):
1815
1814
print (f"===\n Embedded Horde Worker '{ worker_name } ' Starting...\n (To use your own KAI Bridge/Scribe worker instead, don't set your API key)" )
1816
1815
BRIDGE_AGENT = f"KoboldCppEmbedWorker:2:https://github.com/LostRuins/koboldcpp"
1817
1816
cluster = "https://horde.koboldai.net"
1818
- while exitcounter < 35 :
1817
+ while exitcounter < 10 :
1819
1818
time .sleep (3 )
1820
1819
readygo = make_url_request (f'{ epurl } /api/v1/info/version' , None ,'GET' )
1821
1820
if readygo :
1822
1821
print_with_time (f"Embedded Horde Worker '{ worker_name } ' is started." )
1823
1822
break
1824
1823
1825
- while exitcounter < 40 :
1824
+ while exitcounter < 10 :
1826
1825
currentjob_attempts = 0
1827
1826
current_generation = None
1828
1827
1829
- if punishcounter >= 8 :
1828
+ if punishcounter >= 5 :
1830
1829
punishcounter = 0
1831
- penaltymult = (1 + (exitcounter // 10 ))
1832
- print_with_time (f"Horde Worker Paused for { penaltymult * 10 } min - Too many errors. It will resume automatically, but you should restart it." )
1833
- print_with_time (f"Caution: Too many failed jobs may lead to entering maintenance mode." )
1834
- time .sleep (600 * penaltymult )
1830
+ exitcounter += 1
1831
+ if exitcounter < 10 :
1832
+ penaltytime = (2 ** exitcounter )
1833
+ print_with_time (f"Horde Worker Paused for { penaltytime } min - Too many errors. It will resume automatically, but you should restart it." )
1834
+ print_with_time (f"Caution: Too many failed jobs may lead to entering maintenance mode." )
1835
+ time .sleep (60 * penaltytime )
1836
+ else :
1837
+ print_with_time (f"Exit limit reached, too many errors." )
1835
1838
1836
1839
#first, make sure we are not generating
1837
1840
if modelbusy .locked ():
@@ -1850,7 +1853,6 @@ def make_url_request(url, data, method='POST'):
1850
1853
}
1851
1854
pop = make_url_request (f'{ cluster } /api/v2/generate/text/pop' ,gen_dict )
1852
1855
if not pop :
1853
- exitcounter += 1
1854
1856
punishcounter += 1
1855
1857
print_with_time (f"Failed to fetch job from { cluster } . Waiting 10 seconds..." )
1856
1858
time .sleep (10 )
@@ -1870,7 +1872,7 @@ def make_url_request(url, data, method='POST'):
1870
1872
print_with_time (f"Job received from { cluster } for { current_payload .get ('max_length' ,80 )} tokens and { current_payload .get ('max_context_length' ,1024 )} max context. Starting generation..." )
1871
1873
1872
1874
#do gen
1873
- while exitcounter < 35 :
1875
+ while exitcounter < 10 :
1874
1876
if not modelbusy .locked ():
1875
1877
current_generation = make_url_request (f'{ epurl } /api/v1/generate' , current_payload )
1876
1878
if current_generation :
0 commit comments