Fix LR scheduler issue with CPU offload optimizer (#1649)

gau-nernst · web-flow · commit 6ffe2360a738 · 2025-02-02T20:36:01.000+08:00
* synchronize param H2D

* let CPU offload inherits Optimizer

* add scheduler to test
diff --git a/test/prototype/test_low_bit_optim.py b/test/prototype/test_low_bit_optim.py
@@ -287,6 +287,9 @@ def test_optim_cpu_offload_correctness(self, offload_grad, grad_accum):
             offload_gradients=offload_grad,
         )
 
+        scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(optim1, 100)
+        scheduler2 = torch.optim.lr_scheduler.CosineAnnealingLR(optim2, 100)
+
         rng = torch.Generator(device=device)
         rng.manual_seed(42)
 
@@ -299,6 +302,7 @@ def test_optim_cpu_offload_correctness(self, offload_grad, grad_accum):
 
             optim1.step()
             optim1.zero_grad()
+            scheduler1.step()
 
         # reset the rng
         rng.manual_seed(42)
@@ -309,6 +313,7 @@ def test_optim_cpu_offload_correctness(self, offload_grad, grad_accum):
 
             optim2.step()
             optim2.zero_grad()
+            scheduler2.step()
 
         for p1, p2 in zip(model1.parameters(), model2.parameters()):
             torch.testing.assert_close(p2, p1)
diff --git a/torchao/prototype/low_bit_optim/cpu_offload.py b/torchao/prototype/low_bit_optim/cpu_offload.py
@@ -6,7 +6,11 @@
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_4, get_available_devices
 
 
-class CPUOffloadOptimizer:
+# NOTE: We make this inherit Optimizer so it works with PyTorch's built-in LR
+# schedulers. (those schedulers specifically check for instances of Optimizer).
+# However, it won't behave exactly like Optimizer e.g. we don't call
+# Optimizer.__init__(), there is no self.defaults.
+class CPUOffloadOptimizer(Optimizer):
     def __init__(
         self,
         params: ParamsT,