Skip to content

Commit 268ebdf

Browse files
bglick13Nathan Lambert
and
Nathan Lambert
authored
Pipeline cleanup (#947)
* valuefunction code * start example scripts * missing imports * bug fixes and placeholder example script * add value function scheduler * load value function from hub and get best actions in example * very close to working example * larger batch size for planning * more tests * merge unet1d changes * wandb for debugging, use newer models * success! * turns out we just need more diffusion steps * run on modal * merge and code cleanup * use same api for rl model * fix variance type * wrong normalization function * add tests * style * style and quality * edits based on comments * style and quality * remove unused var * hack unet1d into a value function * add pipeline * fix arg order * add pipeline to core library * community pipeline * fix couple shape bugs * style * Apply suggestions from code review * clean up comments * convert older script to using pipeline and add readme * rename scripts * style, update tests * delete unet rl model file * remove imports in src Co-authored-by: Nathan Lambert <nathan@huggingface.co>
1 parent 713e8f2 commit 268ebdf

File tree

6 files changed

+291
-6
lines changed

6 files changed

+291
-6
lines changed

examples/community/pipeline.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import torch
23

34
import tqdm
@@ -59,7 +60,6 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale):
5960
for i in tqdm.tqdm(self.scheduler.timesteps):
6061
# create batch of timesteps to pass into model
6162
timesteps = torch.full((batch_size,), i, device=self.unet.device, dtype=torch.long)
62-
# 3. call the sample function
6363
for _ in range(n_guide_steps):
6464
with torch.enable_grad():
6565
x.requires_grad_()
@@ -76,24 +76,39 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale):
7676
prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1)
7777
x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"]
7878

79-
# 4. apply conditions to the trajectory
79+
# apply conditions to the trajectory
8080
x = self.reset_x0(x, conditions, self.action_dim)
8181
x = self.to_torch(x)
8282
return x, y
8383

8484
def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1):
85+
# normalize the observations and create batch dimension
8586
obs = self.normalize(obs, "observations")
8687
obs = obs[None].repeat(batch_size, axis=0)
88+
8789
conditions = {0: self.to_torch(obs)}
8890
shape = (batch_size, planning_horizon, self.state_dim + self.action_dim)
91+
92+
# generate initial noise and apply our conditions (to make the trajectories start at current state)
8993
x1 = torch.randn(shape, device=self.unet.device)
9094
x = self.reset_x0(x1, conditions, self.action_dim)
9195
x = self.to_torch(x)
96+
97+
# run the diffusion process
9298
x, y = self.run_diffusion(x, conditions, n_guide_steps, scale)
99+
100+
# sort output trajectories by value
93101
sorted_idx = y.argsort(0, descending=True).squeeze()
94102
sorted_values = x[sorted_idx]
95103
actions = sorted_values[:, :, : self.action_dim]
96104
actions = actions.detach().cpu().numpy()
97105
denorm_actions = self.de_normalize(actions, key="actions")
98-
denorm_actions = denorm_actions[0, 0]
106+
107+
# select the action with the highest value
108+
if y is not None:
109+
selected_index = 0
110+
else:
111+
# if we didn't run value guiding, select a random action
112+
selected_index = np.random.randint(0, batch_size)
113+
denorm_actions = denorm_actions[selected_index, 0]
99114
return denorm_actions

examples/community/value_guided_diffuser.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale):
5959
for i in tqdm.tqdm(self.scheduler.timesteps):
6060
# create batch of timesteps to pass into model
6161
timesteps = torch.full((batch_size,), i, device=self.unet.device, dtype=torch.long)
62-
# 3. call the sample function
6362
for _ in range(n_guide_steps):
6463
with torch.enable_grad():
6564
x.requires_grad_()
@@ -76,24 +75,34 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale):
7675
prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1)
7776
x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"]
7877

79-
# 4. apply conditions to the trajectory
78+
# apply conditions to the trajectory
8079
x = self.reset_x0(x, conditions, self.action_dim)
8180
x = self.to_torch(x)
8281
return x, y
8382

8483
def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1):
84+
# normalize the observations and create batch dimension
8585
obs = self.normalize(obs, "observations")
8686
obs = obs[None].repeat(batch_size, axis=0)
87+
8788
conditions = {0: self.to_torch(obs)}
8889
shape = (batch_size, planning_horizon, self.state_dim + self.action_dim)
90+
91+
# generate initial noise and apply our conditions (to make the trajectories start at current state)
8992
x1 = torch.randn(shape, device=self.unet.device)
9093
x = self.reset_x0(x1, conditions, self.action_dim)
9194
x = self.to_torch(x)
95+
96+
# run the diffusion process
9297
x, y = self.run_diffusion(x, conditions, n_guide_steps, scale)
98+
99+
# sort output trajectories by value
93100
sorted_idx = y.argsort(0, descending=True).squeeze()
94101
sorted_values = x[sorted_idx]
95102
actions = sorted_values[:, :, : self.action_dim]
96103
actions = actions.detach().cpu().numpy()
97104
denorm_actions = self.de_normalize(actions, key="actions")
105+
106+
# select the action with the highest value
98107
denorm_actions = denorm_actions[0, 0]
99108
return denorm_actions

examples/diffuser/README.md

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Overview
2+
3+
These examples show how to run (Diffuser)[https://arxiv.org/pdf/2205.09991.pdf] in Diffusers. There are two scripts, `run_diffuser_value_guided.py` and `run_diffuser.py`.
4+
5+
You will need some RL specific requirements to run the examples:
6+
7+
```
8+
pip install -f https://download.pytorch.org/whl/torch_stable.html \
9+
free-mujoco-py \
10+
einops \
11+
gym \
12+
protobuf==3.20.1 \
13+
git+https://github.com/rail-berkeley/d4rl.git \
14+
mediapy \
15+
Pillow==9.0.0
16+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import d4rl # noqa
2+
import gym
3+
import tqdm
4+
from diffusers import DDPMScheduler, DiffusionPipeline, UNet1DModel
5+
6+
7+
config = dict(
8+
n_samples=64,
9+
horizon=32,
10+
num_inference_steps=20,
11+
n_guide_steps=0,
12+
scale_grad_by_std=True,
13+
scale=0.1,
14+
eta=0.0,
15+
t_grad_cutoff=2,
16+
device="cpu",
17+
)
18+
19+
20+
def _run():
21+
env_name = "hopper-medium-v2"
22+
env = gym.make(env_name)
23+
24+
DEVICE = config["device"]
25+
26+
scheduler = DDPMScheduler(
27+
num_train_timesteps=config["num_inference_steps"],
28+
beta_schedule="squaredcos_cap_v2",
29+
clip_sample=False,
30+
variance_type="fixed_small_log",
31+
)
32+
network = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-value-function-hor32").to(device=DEVICE).eval()
33+
unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-unet-hor32").to(device=DEVICE).eval()
34+
pipeline = DiffusionPipeline.from_pretrained(
35+
"bglick13/hopper-medium-v2-value-function-hor32",
36+
value_function=network,
37+
unet=unet,
38+
scheduler=scheduler,
39+
env=env,
40+
custom_pipeline="/Users/bglickenhaus/Documents/diffusers/examples/community",
41+
)
42+
43+
env.seed(0)
44+
obs = env.reset()
45+
total_reward = 0
46+
total_score = 0
47+
T = 1000
48+
rollout = [obs.copy()]
49+
try:
50+
for t in tqdm.tqdm(range(T)):
51+
# Call the policy
52+
denorm_actions = pipeline(obs, planning_horizon=32)
53+
54+
# execute action in environment
55+
next_observation, reward, terminal, _ = env.step(denorm_actions)
56+
score = env.get_normalized_score(total_reward)
57+
# update return
58+
total_reward += reward
59+
total_score += score
60+
print(
61+
f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:"
62+
f" {total_score}"
63+
)
64+
# save observations for rendering
65+
rollout.append(next_observation.copy())
66+
67+
obs = next_observation
68+
except KeyboardInterrupt:
69+
pass
70+
71+
print(f"Total reward: {total_reward}")
72+
73+
74+
def run():
75+
_run()
76+
77+
78+
if __name__ == "__main__":
79+
run()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import d4rl # noqa
2+
import gym
3+
import tqdm
4+
from diffusers import DDPMScheduler, DiffusionPipeline, UNet1DModel
5+
6+
7+
config = dict(
8+
n_samples=64,
9+
horizon=32,
10+
num_inference_steps=20,
11+
n_guide_steps=2,
12+
scale_grad_by_std=True,
13+
scale=0.1,
14+
eta=0.0,
15+
t_grad_cutoff=2,
16+
device="cpu",
17+
)
18+
19+
20+
def _run():
21+
env_name = "hopper-medium-v2"
22+
env = gym.make(env_name)
23+
24+
# Cuda settings for colab
25+
# torch.cuda.get_device_name(0)
26+
DEVICE = config["device"]
27+
28+
# Two generators for different parts of the diffusion loop to work in colab
29+
scheduler = DDPMScheduler(
30+
num_train_timesteps=config["num_inference_steps"],
31+
beta_schedule="squaredcos_cap_v2",
32+
clip_sample=False,
33+
variance_type="fixed_small_log",
34+
)
35+
36+
network = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-value-function-hor32").to(device=DEVICE).eval()
37+
unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-unet-hor32").to(device=DEVICE).eval()
38+
pipeline = DiffusionPipeline.from_pretrained(
39+
"bglick13/hopper-medium-v2-value-function-hor32",
40+
value_function=network,
41+
unet=unet,
42+
scheduler=scheduler,
43+
env=env,
44+
custom_pipeline="/Users/bglickenhaus/Documents/diffusers/examples/community",
45+
)
46+
47+
env.seed(0)
48+
obs = env.reset()
49+
total_reward = 0
50+
total_score = 0
51+
T = 1000
52+
rollout = [obs.copy()]
53+
try:
54+
for t in tqdm.tqdm(range(T)):
55+
# call the policy
56+
denorm_actions = pipeline(obs, planning_horizon=32)
57+
58+
# execute action in environment
59+
next_observation, reward, terminal, _ = env.step(denorm_actions)
60+
score = env.get_normalized_score(total_reward)
61+
# update return
62+
total_reward += reward
63+
total_score += score
64+
print(
65+
f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:"
66+
f" {total_score}"
67+
)
68+
# save observations for rendering
69+
rollout.append(next_observation.copy())
70+
71+
obs = next_observation
72+
except KeyboardInterrupt:
73+
pass
74+
75+
print(f"Total reward: {total_reward}")
76+
77+
78+
def run():
79+
_run()
80+
81+
82+
if __name__ == "__main__":
83+
run()

tests/test_models_unet.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
import torch
2222

23-
from diffusers import UNet1DModel, UNet2DConditionModel, UNet2DModel
23+
from diffusers import UNet1DModel, UNet2DConditionModel, UNet2DModel, ValueFunction
2424
from diffusers.utils import floats_tensor, slow, torch_device
2525

2626
from .test_modeling_common import ModelTesterMixin
@@ -524,3 +524,86 @@ def test_output_pretrained(self):
524524
def test_forward_with_norm_groups(self):
525525
# Not implemented yet for this UNet
526526
pass
527+
528+
529+
class UNetRLModelTests(ModelTesterMixin, unittest.TestCase):
530+
model_class = UNet1DModel
531+
532+
@property
533+
def dummy_input(self):
534+
batch_size = 4
535+
num_features = 14
536+
seq_len = 16
537+
538+
noise = floats_tensor((batch_size, num_features, seq_len)).to(torch_device)
539+
time_step = torch.tensor([10] * batch_size).to(torch_device)
540+
541+
return {"sample": noise, "timestep": time_step}
542+
543+
@property
544+
def input_shape(self):
545+
return (4, 14, 16)
546+
547+
@property
548+
def output_shape(self):
549+
return (4, 14, 1)
550+
551+
def test_ema_training(self):
552+
pass
553+
554+
def test_training(self):
555+
pass
556+
557+
def prepare_init_args_and_inputs_for_common(self):
558+
init_dict = {
559+
"block_out_channels": (32, 64, 128, 256),
560+
"in_channels": 14,
561+
"out_channels": 14,
562+
}
563+
inputs_dict = self.dummy_input
564+
return init_dict, inputs_dict
565+
566+
def test_from_pretrained_hub(self):
567+
unet, loading_info = UNet1DModel.from_pretrained(
568+
"bglick13/hopper-medium-v2-unet-hor32", output_loading_info=True
569+
)
570+
value_function, vf_loading_info = UNet1DModel.from_pretrained(
571+
"bglick13/hopper-medium-v2-value-function-hor32", output_loading_info=True
572+
)
573+
self.assertIsNotNone(unet)
574+
self.assertEqual(len(loading_info["missing_keys"]), 0)
575+
self.assertIsNotNone(value_function)
576+
self.assertEqual(len(vf_loading_info["missing_keys"]), 0)
577+
578+
unet.to(torch_device)
579+
value_function.to(torch_device)
580+
image = value_function(**self.dummy_input)
581+
582+
assert image is not None, "Make sure output is not None"
583+
584+
def test_output_pretrained(self):
585+
value_function, vf_loading_info = UNet1DModel.from_pretrained(
586+
"bglick13/hopper-medium-v2-value-function-hor32", output_loading_info=True
587+
)
588+
torch.manual_seed(0)
589+
if torch.cuda.is_available():
590+
torch.cuda.manual_seed_all(0)
591+
592+
num_features = value_function.in_channels
593+
seq_len = 14
594+
noise = torch.randn((1, seq_len, num_features)).permute(
595+
0, 2, 1
596+
) # match original, we can update values and remove
597+
time_step = torch.full((num_features,), 0)
598+
599+
with torch.no_grad():
600+
output = value_function(noise, time_step).sample
601+
602+
# fmt: off
603+
expected_output_slice = torch.tensor([207.0272] * seq_len)
604+
# fmt: on
605+
self.assertTrue(torch.allclose(output, expected_output_slice, rtol=1e-3))
606+
607+
def test_forward_with_norm_groups(self):
608+
# Not implemented yet for this UNet
609+
pass

0 commit comments

Comments
 (0)