From f29ace484b6950daecc1e75578117676f23e0e4e Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Sat, 8 Oct 2022 12:15:04 -0400 Subject: [PATCH 01/32] valuefunction code --- scripts/run_diffuser.py | 121 ++++++++++++++++++++++++++++++++ src/diffusers/models/unet_rl.py | 117 ++++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 scripts/run_diffuser.py diff --git a/scripts/run_diffuser.py b/scripts/run_diffuser.py new file mode 100644 index 000000000000..c672421c210c --- /dev/null +++ b/scripts/run_diffuser.py @@ -0,0 +1,121 @@ +import d4rl + +import torch +import tqdm +import numpy as np +import gym + +env_name = "hopper-medium-expert-v2" +env = gym.make(env_name) +data = env.get_dataset() # dataset is only used for normalization in this colab + +# Cuda settings for colab +# torch.cuda.get_device_name(0) +DEVICE = 'cpu' +DTYPE = torch.float + +# diffusion model settings +n_samples = 4 # number of trajectories planned via diffusion +horizon = 128 # length of sampled trajectories +state_dim = env.observation_space.shape[0] +action_dim = env.action_space.shape[0] +num_inference_steps = 100 # number of difusion steps + +def normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = 2*(x_in - lower)/(upper-lower) - 1 + return x_out + +def de_normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = lower + (upper - lower)*(1 + x_in) /2 + return x_out + +def to_torch(x_in, dtype=None, device=None): + dtype = dtype or DTYPE + device = device or DEVICE + if type(x_in) is dict: + return {k: to_torch(v, dtype, device) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(device).type(dtype) + return torch.tensor(x_in, dtype=dtype, device=device) + +obs = env.reset() +obs_raw = obs + +# normalize observations for forward passes +obs = normalize(obs, data, 'observations') + +from diffusers import DDPMScheduler, TemporalUNet + +# Two generators for different parts of the diffusion loop to work in colab +generator = torch.Generator(device='cuda') +generator_cpu = torch.Generator(device='cpu') + +scheduler = DDPMScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2") + +# 3 different pretrained models are available for this task. +# The horizion represents the length of trajectories used in training. +network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) +# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) +# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) +def reset_x0(x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + +# network specific constants for inference +clip_denoised = network.clip_denoised +predict_epsilon = network.predict_epsilon + +## add a batch dimension and repeat for multiple samples +## [ observation_dim ] --> [ n_samples x observation_dim ] +obs = obs[None].repeat(n_samples, axis=0) +conditions = { + 0: to_torch(obs, device=DEVICE) + } + +# constants for inference +batch_size = len(conditions[0]) +shape = (batch_size, horizon, state_dim+action_dim) + +# sample random initial noise vector +x1 = torch.randn(shape, device=DEVICE, generator=generator) + +# this model is conditioned from an initial state, so you will see this function +# multiple times to change the initial state of generated data to the state +# generated via env.reset() above or env.step() below +x = reset_x0(x1, conditions, action_dim) + +# convert a np observation to torch for model forward pass +x = to_torch(x) + +eta = 1.0 # noise factor for sampling reconstructed state + +# run the diffusion process +# for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): +for i in tqdm.tqdm(scheduler.timesteps): + + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) + + # 1. generate prediction from model + with torch.no_grad(): + residual = network(x, timesteps).sample + + # 2. use the model prediction to reconstruct an observation (de-noise) + obs_reconstruct = scheduler.step(residual, i, x, predict_epsilon=predict_epsilon)["prev_sample"] + + # 3. [optional] add posterior noise to the sample + if eta > 0: + noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) + posterior_variance = scheduler._get_variance(i) # * noise + # no noise when t == 0 + # NOTE: original implementation missing sqrt on posterior_variance + obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated + + # 4. apply conditions to the trajectory + obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) + x = to_torch(obs_reconstruct_postcond) \ No newline at end of file diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index 420a1661d526..a14489a5734e 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -175,3 +175,120 @@ def forward( return (sample,) return TemporalUNetOutput(sample=sample) + + +class ValueFunction(ModelMixin, ConfigMixin): + @register_to_config + def __init__( + self, + training_horizon=128, + transition_dim=14, + cond_dim=3, + predict_epsilon=False, + clip_denoised=True, + dim=32, + dim_mults=(1, 4, 8), + out_dim=1, + ): + super().__init__() + + self.transition_dim = transition_dim + self.cond_dim = cond_dim + self.predict_epsilon = predict_epsilon + self.clip_denoised = clip_denoised + + self.time_proj = Timesteps(num_channels=dim, flip_sin_to_cos=False, downscale_freq_shift=1) + self.time_mlp = TimestepEmbedding(channel=dim, time_embed_dim=4 * dim, act_fn="mish", out_dim=dim) + + dims = [transition_dim, *map(lambda m: dim * m, dim_mults)] + in_out = list(zip(dims[:-1], dims[1:])) + + self.blocks = nn.ModuleList([]) + num_resolutions = len(in_out) + + for ind, (dim_in, dim_out) in enumerate(in_out): + is_last = ind >= (num_resolutions - 1) + + self.blocks.append( + nn.ModuleList( + [ + ResidualTemporalBlock(dim_in, dim_out, embed_dim=dim, horizon=training_horizon), + ResidualTemporalBlock(dim_out, dim_out, embed_dim=dim, horizon=training_horizon), + Downsample1D(dim_out, use_conv=True), + ] + ) + ) + + if not is_last: + training_horizon = training_horizon // 2 + + mid_dim = dims[-1] + mid_dim_2 = mid_dim // 2 + mid_dim_3 = mid_dim // 4 + ## + self.mid_block1 = ResidualTemporalBlock(mid_dim, mid_dim_2, embed_dim=dim, horizon=training_horizon) + self.mid_down1 = Downsample1D(mid_dim_2, use_conv=True) + training_horizon = training_horizon // 2 + ## + self.mid_block2 = ResidualTemporalBlock(mid_dim_2, mid_dim_3, embed_dim=dim, horizon=training_horizon) + self.mid_down2 = Downsample1D(mid_dim_3, use_conv=True) + training_horizon = training_horizon // 2 + ## + fc_dim = mid_dim_3 * max(training_horizon, 1) + self.final_block = nn.ModuleList( + nn.Linear(fc_dim + dim, fc_dim // 2), + nn.Mish(), + nn.Linear(fc_dim // 2, out_dim), + ) + + def forward( + self, + sample: torch.FloatTensor, + timestep: Union[torch.Tensor, float, int], + return_dict: bool = True, + ) -> Union[TemporalUNetOutput, Tuple]: + """r + Args: + sample (`torch.FloatTensor`): (batch, horizon, obs_dimension + action_dimension) noisy inputs tensor + timestep (`torch.FloatTensor` or `float` or `int): batch (batch) timesteps + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.unet_2d.UNet2DOutput`] instead of a plain tuple. + + Returns: + [`~models.unet_2d.UNet2DOutput`] or `tuple`: [`~models.unet_2d.UNet2DOutput`] if `return_dict` is True, + otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + """ + sample = sample.permute(0, 2, 1) + + # 1. time + timesteps = timestep + if not torch.is_tensor(timesteps): + timesteps = torch.tensor([timesteps], dtype=torch.long, device=sample.device) + elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None].to(sample.device) + + t = self.time_proj(timesteps) + t = self.time_mlp(t) + h = [] + + # 2. down + for resnet, resnet2, downsample in self.blocks: + sample = resnet(sample, t) + sample = resnet2(sample, t) + h.append(sample) + sample = downsample(sample) + + # 3. mid + sample = self.mid_block1(sample, t) + sample = self.mid_down1(sample) + sample = self.mid_block2(sample, t) + sample = self.mid_down2(sample) + + sample = sample.view(sample.shape[0], -1) + sample = torch.cat((sample, t), dim=1) + sample = self.final_block(sample) + + if not return_dict: + return (sample,) + + return TemporalUNetOutput(sample=sample) From 1684e8b870549cc04f9596036a97476ab9e4e123 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Sat, 8 Oct 2022 12:26:25 -0400 Subject: [PATCH 02/32] start example scripts --- .../diffuser}/run_diffuser.py | 0 examples/diffuser/train_diffuser.py | 75 +++++++++++++++++++ 2 files changed, 75 insertions(+) rename {scripts => examples/diffuser}/run_diffuser.py (100%) create mode 100644 examples/diffuser/train_diffuser.py diff --git a/scripts/run_diffuser.py b/examples/diffuser/run_diffuser.py similarity index 100% rename from scripts/run_diffuser.py rename to examples/diffuser/run_diffuser.py diff --git a/examples/diffuser/train_diffuser.py b/examples/diffuser/train_diffuser.py new file mode 100644 index 000000000000..902f5ec7357c --- /dev/null +++ b/examples/diffuser/train_diffuser.py @@ -0,0 +1,75 @@ +import d4rl + +import torch +import tqdm +import numpy as np +import gym +from accelerate import Accelerator +env_name = "hopper-medium-expert-v2" +env = gym.make(env_name) +data = env.get_dataset() # dataset is only used for normalization in this colab + +# Cuda settings for colab +# torch.cuda.get_device_name(0) +DEVICE = 'cpu' +DTYPE = torch.float + +# diffusion model settings +n_samples = 4 # number of trajectories planned via diffusion +horizon = 128 # length of sampled trajectories +state_dim = env.observation_space.shape[0] +action_dim = env.action_space.shape[0] +num_inference_steps = 100 # number of difusion steps + +def normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = 2*(x_in - lower)/(upper-lower) - 1 + return x_out + +def de_normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = lower + (upper - lower)*(1 + x_in) /2 + return x_out + +def to_torch(x_in, dtype=None, device=None): + dtype = dtype or DTYPE + device = device or DEVICE + if type(x_in) is dict: + return {k: to_torch(v, dtype, device) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(device).type(dtype) + return torch.tensor(x_in, dtype=dtype, device=device) + +obs = env.reset() +obs_raw = obs + +# normalize observations for forward passes +obs = normalize(obs, data, 'observations') + +from diffusers import DDPMScheduler, TemporalUNet + +# Two generators for different parts of the diffusion loop to work in colab +generator = torch.Generator(device='cuda') +generator_cpu = torch.Generator(device='cpu') +network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) + +scheduler = DDPMScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2") +optimizer = torch.optim.AdamW( + network.parameters(), + lr=0.001, + betas=(0.95, 0.99), + weight_decay=1e-6, + eps=1e-8, + ) +# 3 different pretrained models are available for this task. +# The horizion represents the length of trajectories used in training. +# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) +# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) +def reset_x0(x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + +# TODO: Flesh this out using accelerate library (a la other examples) \ No newline at end of file From c7579858272b57c564b8fedc94cbc8b31ff91752 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Sat, 8 Oct 2022 12:46:34 -0400 Subject: [PATCH 03/32] missing imports --- src/diffusers/__init__.py | 2 +- src/diffusers/models/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index e6b920a31b4c..d3419860d48d 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -18,7 +18,7 @@ if is_torch_available(): from .modeling_utils import ModelMixin - from .models import AutoencoderKL, TemporalUNet, UNet2DConditionModel, UNet2DModel, VQModel + from .models import AutoencoderKL, TemporalUNet, UNet2DConditionModel, UNet2DModel, VQModel, ValueFunction from .optimization import ( get_constant_schedule, get_constant_schedule_with_warmup, diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index 47f7fa71682b..4bedc43e9007 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -18,7 +18,7 @@ if is_torch_available(): from .unet_2d import UNet2DModel from .unet_2d_condition import UNet2DConditionModel - from .unet_rl import TemporalUNet + from .unet_rl import TemporalUNet, ValueFunction from .vae import AutoencoderKL, VQModel if is_flax_available(): From b3159182e97790c54baae5c5f6d4d45cf95b9c18 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Sat, 8 Oct 2022 12:58:37 -0400 Subject: [PATCH 04/32] bug fixes and placeholder example script --- .../diffuser/run_diffuser_value_guided.py | 121 ++++++++++++++++++ src/diffusers/models/unet_rl.py | 12 +- 2 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 examples/diffuser/run_diffuser_value_guided.py diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py new file mode 100644 index 000000000000..a18678ee4b9a --- /dev/null +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -0,0 +1,121 @@ +import d4rl + +import torch +import tqdm +import numpy as np +import gym + +env_name = "hopper-medium-expert-v2" +env = gym.make(env_name) +data = env.get_dataset() # dataset is only used for normalization in this colab + +# Cuda settings for colab +# torch.cuda.get_device_name(0) +DEVICE = 'cpu' +DTYPE = torch.float + +# diffusion model settings +n_samples = 4 # number of trajectories planned via diffusion +horizon = 128 # length of sampled trajectories +state_dim = env.observation_space.shape[0] +action_dim = env.action_space.shape[0] +num_inference_steps = 100 # number of difusion steps + +def normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = 2*(x_in - lower)/(upper-lower) - 1 + return x_out + +def de_normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = lower + (upper - lower)*(1 + x_in) /2 + return x_out + +def to_torch(x_in, dtype=None, device=None): + dtype = dtype or DTYPE + device = device or DEVICE + if type(x_in) is dict: + return {k: to_torch(v, dtype, device) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(device).type(dtype) + return torch.tensor(x_in, dtype=dtype, device=device) + +obs = env.reset() +obs_raw = obs + +# normalize observations for forward passes +obs = normalize(obs, data, 'observations') + +from diffusers import DDPMScheduler, TemporalUNet, ValueFunction + +# Two generators for different parts of the diffusion loop to work in colab +# generator = torch.Generator(device='cuda') +generator_cpu = torch.Generator(device='cpu') + +scheduler = DDPMScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2") + +# 3 different pretrained models are available for this task. +# The horizion represents the length of trajectories used in training. +network = ValueFunction.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) +# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) +# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) +def reset_x0(x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + +# network specific constants for inference +clip_denoised = network.clip_denoised +predict_epsilon = network.predict_epsilon + +## add a batch dimension and repeat for multiple samples +## [ observation_dim ] --> [ n_samples x observation_dim ] +obs = obs[None].repeat(n_samples, axis=0) +conditions = { + 0: to_torch(obs, device=DEVICE) + } + +# constants for inference +batch_size = len(conditions[0]) +shape = (batch_size, horizon, state_dim+action_dim) + +# sample random initial noise vector +x1 = torch.randn(shape, device=DEVICE, generator=generator) + +# this model is conditioned from an initial state, so you will see this function +# multiple times to change the initial state of generated data to the state +# generated via env.reset() above or env.step() below +x = reset_x0(x1, conditions, action_dim) + +# convert a np observation to torch for model forward pass +x = to_torch(x) + +eta = 1.0 # noise factor for sampling reconstructed state + +# run the diffusion process +# for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): +for i in tqdm.tqdm(scheduler.timesteps): + + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) + + # 1. generate prediction from model + with torch.no_grad(): + residual = network(x, timesteps).sample + + # 2. use the model prediction to reconstruct an observation (de-noise) + obs_reconstruct = scheduler.step(residual, i, x, predict_epsilon=predict_epsilon)["prev_sample"] + + # 3. [optional] add posterior noise to the sample + if eta > 0: + noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) + posterior_variance = scheduler._get_variance(i) # * noise + # no noise when t == 0 + # NOTE: original implementation missing sqrt on posterior_variance + obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated + + # 4. apply conditions to the trajectory + obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) + x = to_torch(obs_reconstruct_postcond) \ No newline at end of file diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index a14489a5734e..7fcade5b79c1 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -212,8 +212,8 @@ def __init__( self.blocks.append( nn.ModuleList( [ - ResidualTemporalBlock(dim_in, dim_out, embed_dim=dim, horizon=training_horizon), - ResidualTemporalBlock(dim_out, dim_out, embed_dim=dim, horizon=training_horizon), + ResidualTemporalBlock(dim_in, dim_out, embed_dim=dim), + ResidualTemporalBlock(dim_out, dim_out, embed_dim=dim), Downsample1D(dim_out, use_conv=True), ] ) @@ -226,19 +226,19 @@ def __init__( mid_dim_2 = mid_dim // 2 mid_dim_3 = mid_dim // 4 ## - self.mid_block1 = ResidualTemporalBlock(mid_dim, mid_dim_2, embed_dim=dim, horizon=training_horizon) + self.mid_block1 = ResidualTemporalBlock(mid_dim, mid_dim_2, embed_dim=dim) self.mid_down1 = Downsample1D(mid_dim_2, use_conv=True) training_horizon = training_horizon // 2 ## - self.mid_block2 = ResidualTemporalBlock(mid_dim_2, mid_dim_3, embed_dim=dim, horizon=training_horizon) + self.mid_block2 = ResidualTemporalBlock(mid_dim_2, mid_dim_3, embed_dim=dim) self.mid_down2 = Downsample1D(mid_dim_3, use_conv=True) training_horizon = training_horizon // 2 ## fc_dim = mid_dim_3 * max(training_horizon, 1) - self.final_block = nn.ModuleList( + self.final_block = nn.ModuleList([ nn.Linear(fc_dim + dim, fc_dim // 2), nn.Mish(), - nn.Linear(fc_dim // 2, out_dim), + nn.Linear(fc_dim // 2, out_dim),] ) def forward( From f01c014f837f77587c51fe6f4783cf4b142e7943 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Sun, 9 Oct 2022 14:15:20 -0400 Subject: [PATCH 05/32] add value function scheduler --- .../diffuser/run_diffuser_value_guided.py | 29 +- src/diffusers/__init__.py | 1 + src/diffusers/models/unet_rl.py | 5 +- src/diffusers/schedulers/__init__.py | 1 + .../schedulers/scheduling_value_function.py | 299 ++++++++++++++++++ 5 files changed, 325 insertions(+), 10 deletions(-) create mode 100644 src/diffusers/schedulers/scheduling_value_function.py diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index a18678ee4b9a..3093abffb55e 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -4,6 +4,13 @@ import tqdm import numpy as np import gym +from diffusers import DDPMScheduler, TemporalUNet, ValueFunction, ValueFunctionScheduler + + +# model = torch.load("../diffuser/test.torch") +# hf_value_function = ValueFunction(training_horizon=32, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) +# hf_value_function.load_state_dict(model.state_dict()) +# hf_value_function.to_hub("bglick13/hf_value_function") env_name = "hopper-medium-expert-v2" env = gym.make(env_name) @@ -16,7 +23,7 @@ # diffusion model settings n_samples = 4 # number of trajectories planned via diffusion -horizon = 128 # length of sampled trajectories +horizon = 32 # length of sampled trajectories state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] num_inference_steps = 100 # number of difusion steps @@ -48,17 +55,18 @@ def to_torch(x_in, dtype=None, device=None): # normalize observations for forward passes obs = normalize(obs, data, 'observations') -from diffusers import DDPMScheduler, TemporalUNet, ValueFunction # Two generators for different parts of the diffusion loop to work in colab # generator = torch.Generator(device='cuda') generator_cpu = torch.Generator(device='cpu') -scheduler = DDPMScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2") +scheduler = ValueFunctionScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2", clip_sample=False) # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. -network = ValueFunction.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) +network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) + +# network = ValueFunction.from_pretrained("/Users/bglickenhaus/Documents/diffuser/logs/hopper-medium-v2/values/defaults_H32_T20_d0.997").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) def reset_x0(x_in, cond, act_dim): @@ -82,7 +90,7 @@ def reset_x0(x_in, cond, act_dim): shape = (batch_size, horizon, state_dim+action_dim) # sample random initial noise vector -x1 = torch.randn(shape, device=DEVICE, generator=generator) +x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) # this model is conditioned from an initial state, so you will see this function # multiple times to change the initial state of generated data to the state @@ -102,11 +110,16 @@ def reset_x0(x_in, cond, act_dim): timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) # 1. generate prediction from model - with torch.no_grad(): - residual = network(x, timesteps).sample + with torch.enable_grad(): + x.requires_grad_() + y = network(x, timesteps).sample + grad = torch.autograd.grad([y.sum()], [x])[0] + # tile to (batch_size, 128, 14) + x.detach() + pass # 2. use the model prediction to reconstruct an observation (de-noise) - obs_reconstruct = scheduler.step(residual, i, x, predict_epsilon=predict_epsilon)["prev_sample"] + obs_reconstruct = scheduler.step(grad, i, x, predict_epsilon=predict_epsilon)["prev_sample"] # 3. [optional] add posterior noise to the sample if eta > 0: diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index d3419860d48d..664b62dd29c9 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -37,6 +37,7 @@ PNDMScheduler, SchedulerMixin, ScoreSdeVeScheduler, + ValueFunctionScheduler ) from .training_utils import EMAModel else: diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index 7fcade5b79c1..8b0ee02966c2 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -285,8 +285,9 @@ def forward( sample = self.mid_down2(sample) sample = sample.view(sample.shape[0], -1) - sample = torch.cat((sample, t), dim=1) - sample = self.final_block(sample) + sample = torch.cat((sample, t), dim=-1) + for layer in self.final_block: + sample = layer(sample) if not return_dict: return (sample,) diff --git a/src/diffusers/schedulers/__init__.py b/src/diffusers/schedulers/__init__.py index a906c39eb24c..c4770de538cc 100644 --- a/src/diffusers/schedulers/__init__.py +++ b/src/diffusers/schedulers/__init__.py @@ -24,6 +24,7 @@ from .scheduling_sde_ve import ScoreSdeVeScheduler from .scheduling_sde_vp import ScoreSdeVpScheduler from .scheduling_utils import SchedulerMixin + from .scheduling_value_function import ValueFunctionScheduler else: from ..utils.dummy_pt_objects import * # noqa F403 diff --git a/src/diffusers/schedulers/scheduling_value_function.py b/src/diffusers/schedulers/scheduling_value_function.py new file mode 100644 index 000000000000..bf2fe3ec0412 --- /dev/null +++ b/src/diffusers/schedulers/scheduling_value_function.py @@ -0,0 +1,299 @@ +# Copyright 2022 UC Berkeley Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim + +import math +import warnings +from dataclasses import dataclass +from typing import Optional, Tuple, Union + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from .scheduling_utils import SchedulerMixin + + +@dataclass +class ValueFunctionSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample (x_{0}) based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.FloatTensor + pred_original_sample: Optional[torch.FloatTensor] = None + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + + Args: + num_diffusion_timesteps (`int`): the number of betas to produce. + max_beta (`float`): the maximum beta to use; use values lower than 1 to + prevent singularities. + + Returns: + betas (`np.ndarray`): the betas used by the scheduler to step the model outputs + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class ValueFunctionScheduler(SchedulerMixin, ConfigMixin): + """ + Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and + Langevin dynamics sampling. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and + [`~ConfigMixin.from_config`] functions. + + For more details, see the original paper: https://arxiv.org/abs/2006.11239 + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + variance_type (`str`): + options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, + `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. + clip_sample (`bool`, default `True`): + option to clip predicted sample between -1 and 1 for numerical stability. + + """ + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: Optional[np.ndarray] = None, + variance_type: str = "fixed_small", + clip_sample: bool = True, + **kwargs, + ): + if "tensor_format" in kwargs: + warnings.warn( + "`tensor_format` is deprecated as an argument and will be removed in version `0.5.0`." + "If you're running your code in PyTorch, you can safely remove this argument.", + DeprecationWarning, + ) + + if trained_betas is not None: + self.betas = torch.from_numpy(trained_betas) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + self.one = torch.tensor(1.0) + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1] + + self.variance_type = variance_type + + def set_timesteps(self, num_inference_steps: int): + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + """ + num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange( + 0, self.config.num_train_timesteps, self.config.num_train_timesteps // self.num_inference_steps + )[::-1] + + def _get_variance(self, t, predicted_variance=None, variance_type=None): + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probably added for training stability + if variance_type == "fixed_small": + variance = torch.clamp(variance, min=1e-20) + # for rl-diffuser https://arxiv.org/abs/2205.09991 + elif variance_type == "fixed_small_log": + variance = torch.log(torch.clamp(variance, min=1e-20)) + elif variance_type == "fixed_large": + variance = self.betas[t] + elif variance_type == "fixed_large_log": + # Glide max_log + variance = torch.log(self.betas[t]) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = variance + max_log = self.betas[t] + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def step( + self, + model_output: torch.FloatTensor, + timestep: int, + sample: torch.FloatTensor, + predict_epsilon=True, + generator=None, + scale=0.001, + return_dict: bool = True, + ) -> Union[ValueFunctionSchedulerOutput, Tuple]: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.FloatTensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + current instance of sample being created by diffusion process. + predict_epsilon (`bool`): + optional flag to use when model predicts the samples directly instead of the noise, epsilon. + generator: random number generator. + return_dict (`bool`): option for returning tuple rather than DDPMSchedulerOutput class + + Returns: + [`~schedulers.scheduling_utils.DDPMSchedulerOutput`] or `tuple`: + [`~schedulers.scheduling_utils.DDPMSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When + returning a tuple, the first element is the sample tensor. + + """ + t = timestep + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: + model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf + + pred_original_sample = sample + scale * model_output + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = torch.clamp(pred_original_sample, -1, 1) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * self.betas[t]) / beta_prod_t + current_sample_coeff = self.alphas[t] ** (0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + noise = torch.randn( + model_output.size(), dtype=model_output.dtype, layout=model_output.layout, generator=generator + ).to(model_output.device) + variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * noise + + pred_prev_sample = pred_prev_sample + variance + + if not return_dict: + return (pred_prev_sample,) + + return ValueFunctionSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample) + + def add_noise( + self, + original_samples: torch.FloatTensor, + noise: torch.FloatTensor, + timesteps: torch.IntTensor, + ) -> torch.FloatTensor: + if self.alphas_cumprod.device != original_samples.device: + self.alphas_cumprod = self.alphas_cumprod.to(original_samples.device) + + if timesteps.device != original_samples.device: + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = self.alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps From 7b60c9322e12304bc4109b4ba9a4d6d35bd84ff7 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Sun, 9 Oct 2022 18:47:50 -0400 Subject: [PATCH 06/32] load value function from hub and get best actions in example --- examples/diffuser/run_diffuser_value_guided.py | 13 +++++++++---- .../schedulers/scheduling_value_function.py | 1 - 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 3093abffb55e..91ff0f2bced9 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -64,9 +64,9 @@ def to_torch(x_in, dtype=None, device=None): # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. -network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) +# network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) -# network = ValueFunction.from_pretrained("/Users/bglickenhaus/Documents/diffuser/logs/hopper-medium-v2/values/defaults_H32_T20_d0.997").to(device=DEVICE) +network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) def reset_x0(x_in, cond, act_dim): @@ -119,7 +119,7 @@ def reset_x0(x_in, cond, act_dim): pass # 2. use the model prediction to reconstruct an observation (de-noise) - obs_reconstruct = scheduler.step(grad, i, x, predict_epsilon=predict_epsilon)["prev_sample"] + obs_reconstruct = scheduler.step(grad, i, x)["prev_sample"] # 3. [optional] add posterior noise to the sample if eta > 0: @@ -131,4 +131,9 @@ def reset_x0(x_in, cond, act_dim): # 4. apply conditions to the trajectory obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) - x = to_torch(obs_reconstruct_postcond) \ No newline at end of file + x = to_torch(obs_reconstruct_postcond) +sorted_idx = y.argsort(-1, descending=True).squeeze() +sorted_values = x[sorted_idx] +actions = sorted_values[:, :, :action_dim] +actions = de_normalize(actions[0, 0].detach().numpy(), data, key='actions') +obs, reward, is_done, info = env.step(actions) diff --git a/src/diffusers/schedulers/scheduling_value_function.py b/src/diffusers/schedulers/scheduling_value_function.py index bf2fe3ec0412..fc56a2b18a9f 100644 --- a/src/diffusers/schedulers/scheduling_value_function.py +++ b/src/diffusers/schedulers/scheduling_value_function.py @@ -199,7 +199,6 @@ def step( model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor, - predict_epsilon=True, generator=None, scale=0.001, return_dict: bool = True, From 0de435e0a12e8500474e4803748b215188ff0f14 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Mon, 10 Oct 2022 15:20:09 -0400 Subject: [PATCH 07/32] very close to working example --- examples/diffuser/helpers.py | 186 ++++++++++++++++++ .../diffuser/run_diffuser_value_guided.py | 170 +++++++++------- .../schedulers/scheduling_value_function.py | 4 +- 3 files changed, 292 insertions(+), 68 deletions(-) create mode 100644 examples/diffuser/helpers.py diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py new file mode 100644 index 000000000000..ef853d3a6039 --- /dev/null +++ b/examples/diffuser/helpers.py @@ -0,0 +1,186 @@ +import os +import mediapy as media +import numpy as np +import torch +import gym +import warnings +def to_np(x_in): + if torch.is_tensor(x_in): + x_in = x_in.detach().cpu().numpy() + return x_in + +# from MJ's Diffuser code +# https://github.com/jannerm/diffuser/blob/76ae49ae85ba1c833bf78438faffdc63b8b4d55d/diffuser/utils/colab.py#L79 +def mkdir(savepath): + """ + returns `True` iff `savepath` is created + """ + if not os.path.exists(savepath): + os.makedirs(savepath) + return True + else: + return False + + +def show_sample(renderer, observations, filename='sample.mp4', savebase='videos'): + ''' + observations : [ batch_size x horizon x observation_dim ] + ''' + + mkdir(savebase) + savepath = os.path.join(savebase, filename) + + images = [] + for rollout in observations: + ## [ horizon x height x width x channels ] + img = renderer._renders(rollout, partial=True) + images.append(img) + + ## [ horizon x height x (batch_size * width) x channels ] + images = np.concatenate(images, axis=2) + media.write_video(savepath, images, fps=60) + media.show_video(images, codec='h264', fps=60) + +# Code adapted from Michael Janner +# source: https://github.com/jannerm/diffuser/blob/main/diffuser/utils/rendering.py +import mujoco_py as mjc + +def env_map(env_name): + ''' + map D4RL dataset names to custom fully-observed + variants for rendering + ''' + if 'halfcheetah' in env_name: + return 'HalfCheetahFullObs-v2' + elif 'hopper' in env_name: + return 'HopperFullObs-v2' + elif 'walker2d' in env_name: + return 'Walker2dFullObs-v2' + else: + return env_name + +def get_image_mask(img): + background = (img == 255).all(axis=-1, keepdims=True) + mask = ~background.repeat(3, axis=-1) + return mask + +def atmost_2d(x): + while x.ndim > 2: + x = x.squeeze(0) + return x + +def set_state(env, state): + qpos_dim = env.sim.data.qpos.size + qvel_dim = env.sim.data.qvel.size + if not state.size == qpos_dim + qvel_dim: + warnings.warn( + f'[ utils/rendering ] Expected state of size {qpos_dim + qvel_dim}, ' + f'but got state of size {state.size}') + state = state[:qpos_dim + qvel_dim] + + env.set_state(state[:qpos_dim], state[qpos_dim:]) + +class MuJoCoRenderer: + ''' + default mujoco renderer + ''' + + def __init__(self, env): + if type(env) is str: + env = env_map(env) + self.env = gym.make(env) + else: + self.env = env + ## - 1 because the envs in renderer are fully-observed + ## @TODO : clean up + self.observation_dim = np.prod(self.env.observation_space.shape) - 1 + self.action_dim = np.prod(self.env.action_space.shape) + try: + self.viewer = mjc.MjRenderContextOffscreen(self.env.sim) + except: + print('[ utils/rendering ] Warning: could not initialize offscreen renderer') + self.viewer = None + + def pad_observation(self, observation): + state = np.concatenate([ + np.zeros(1), + observation, + ]) + return state + + def pad_observations(self, observations): + qpos_dim = self.env.sim.data.qpos.size + ## xpos is hidden + xvel_dim = qpos_dim - 1 + xvel = observations[:, xvel_dim] + xpos = np.cumsum(xvel) * self.env.dt + states = np.concatenate([ + xpos[:,None], + observations, + ], axis=-1) + return states + + def render(self, observation, dim=256, partial=False, qvel=True, render_kwargs=None, conditions=None): + + if type(dim) == int: + dim = (dim, dim) + + if self.viewer is None: + return np.zeros((*dim, 3), np.uint8) + + if render_kwargs is None: + xpos = observation[0] if not partial else 0 + render_kwargs = { + 'trackbodyid': 2, + 'distance': 3, + 'lookat': [xpos, -0.5, 1], + 'elevation': -20 + } + + for key, val in render_kwargs.items(): + if key == 'lookat': + self.viewer.cam.lookat[:] = val[:] + else: + setattr(self.viewer.cam, key, val) + + if partial: + state = self.pad_observation(observation) + else: + state = observation + + qpos_dim = self.env.sim.data.qpos.size + if not qvel or state.shape[-1] == qpos_dim: + qvel_dim = self.env.sim.data.qvel.size + state = np.concatenate([state, np.zeros(qvel_dim)]) + + set_state(self.env, state) + + self.viewer.render(*dim) + data = self.viewer.read_pixels(*dim, depth=False) + data = data[::-1, :, :] + return data + + def _renders(self, observations, **kwargs): + images = [] + for observation in observations: + img = self.render(observation, **kwargs) + images.append(img) + return np.stack(images, axis=0) + + def renders(self, samples, partial=False, **kwargs): + if partial: + samples = self.pad_observations(samples) + partial = False + + sample_images = self._renders(samples, partial=partial, **kwargs) + + composite = np.ones_like(sample_images[0]) * 255 + + for img in sample_images: + mask = get_image_mask(img) + composite[mask] = img[mask] + + return composite + + def __call__(self, *args, **kwargs): + return self.renders(*args, **kwargs) \ No newline at end of file diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 91ff0f2bced9..41bc977a06f4 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -5,6 +5,7 @@ import numpy as np import gym from diffusers import DDPMScheduler, TemporalUNet, ValueFunction, ValueFunctionScheduler +from helpers import MuJoCoRenderer, show_sample # model = torch.load("../diffuser/test.torch") @@ -49,24 +50,20 @@ def to_torch(x_in, dtype=None, device=None): return x_in.to(device).type(dtype) return torch.tensor(x_in, dtype=dtype, device=device) -obs = env.reset() -obs_raw = obs - -# normalize observations for forward passes -obs = normalize(obs, data, 'observations') # Two generators for different parts of the diffusion loop to work in colab # generator = torch.Generator(device='cuda') generator_cpu = torch.Generator(device='cpu') -scheduler = ValueFunctionScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2", clip_sample=False) +scheduler = ValueFunctionScheduler(num_train_timesteps=20,beta_schedule="squaredcos_cap_v2", clip_sample=False) # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE) +unet = TemporalUNet.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor32").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) def reset_x0(x_in, cond, act_dim): @@ -75,65 +72,106 @@ def reset_x0(x_in, cond, act_dim): return x_in # network specific constants for inference -clip_denoised = network.clip_denoised -predict_epsilon = network.predict_epsilon - +clip_denoised = False +predict_epsilon = False +n_guide_steps = 2 +scale_grad_by_std = True +scale = 0.001 ## add a batch dimension and repeat for multiple samples ## [ observation_dim ] --> [ n_samples x observation_dim ] -obs = obs[None].repeat(n_samples, axis=0) -conditions = { - 0: to_torch(obs, device=DEVICE) - } - -# constants for inference -batch_size = len(conditions[0]) -shape = (batch_size, horizon, state_dim+action_dim) - -# sample random initial noise vector -x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) - -# this model is conditioned from an initial state, so you will see this function -# multiple times to change the initial state of generated data to the state -# generated via env.reset() above or env.step() below -x = reset_x0(x1, conditions, action_dim) - -# convert a np observation to torch for model forward pass -x = to_torch(x) - -eta = 1.0 # noise factor for sampling reconstructed state - -# run the diffusion process -# for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): -for i in tqdm.tqdm(scheduler.timesteps): - - # create batch of timesteps to pass into model - timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) - - # 1. generate prediction from model - with torch.enable_grad(): - x.requires_grad_() - y = network(x, timesteps).sample - grad = torch.autograd.grad([y.sum()], [x])[0] - # tile to (batch_size, 128, 14) - x.detach() - pass - - # 2. use the model prediction to reconstruct an observation (de-noise) - obs_reconstruct = scheduler.step(grad, i, x)["prev_sample"] - - # 3. [optional] add posterior noise to the sample - if eta > 0: - noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) - posterior_variance = scheduler._get_variance(i) # * noise - # no noise when t == 0 - # NOTE: original implementation missing sqrt on posterior_variance - obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated - - # 4. apply conditions to the trajectory - obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) - x = to_torch(obs_reconstruct_postcond) -sorted_idx = y.argsort(-1, descending=True).squeeze() -sorted_values = x[sorted_idx] -actions = sorted_values[:, :, :action_dim] -actions = de_normalize(actions[0, 0].detach().numpy(), data, key='actions') -obs, reward, is_done, info = env.step(actions) +obs = env.reset() +total_reward = 0 +done = False +T = 300 +rollout = [obs.copy()] +try: + for t in tqdm.tqdm(range(T)): + obs_raw = obs + # 1. Call the policy + # normalize observations for forward passes + obs = normalize(obs, data, 'observations') + + obs = obs[None].repeat(n_samples, axis=0) + conditions = { + 0: to_torch(obs, device=DEVICE) + } + + # 2. Call the diffusion model + # constants for inference + batch_size = len(conditions[0]) + shape = (batch_size, horizon, state_dim+action_dim) + + # sample random initial noise vector + x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) + + # this model is conditioned from an initial state, so you will see this function + # multiple times to change the initial state of generated data to the state + # generated via env.reset() above or env.step() below + x = reset_x0(x1, conditions, action_dim) + + # convert a np observation to torch for model forward pass + x = to_torch(x) + + eta = 1.0 # noise factor for sampling reconstructed state + + # run the diffusion process + # for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): + for i in tqdm.tqdm(scheduler.timesteps): + + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) + + # 3. call the sample function + for _ in range(n_guide_steps): + with torch.enable_grad(): + x.requires_grad_() + y = network(x, timesteps).sample + grad = torch.autograd.grad([y.sum()], [x])[0] + if scale_grad_by_std: + posterior_variance = scheduler._get_variance(i) + grad = posterior_variance * 0.5 * grad + grad[i < 4] = 0 + x = x.detach() + x = x + scale * grad + x = reset_x0(x, conditions, action_dim) + prev_x = unet(x, timesteps).sample + # TODO: This should really be a TemporalUnet that predicts previos state given x + x = scheduler.step(prev_x, i, x)["prev_sample"] + x = reset_x0(x, conditions, action_dim) + if clip_denoised: + x.clamp_(-1., 1.) + # 2. use the model prediction to reconstruct an observation (de-noise) + + + # # 3. [optional] add posterior noise to the sample + # if eta > 0: + # noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) + # posterior_variance = scheduler._get_variance(i) # * noise + # # no noise when t == 0 + # # NOTE: original implementation missing sqrt on posterior_variance + # obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated + + # 4. apply conditions to the trajectory + # obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) + x = to_torch(x) + sorted_idx = y.argsort(0, descending=True).squeeze() + sorted_values = x[sorted_idx] + actions = sorted_values[:, :, :action_dim] + actions = actions[0, 0].detach().cpu().numpy() + actions = de_normalize(actions, data, key='actions') + ## execute action in environment + next_observation, reward, terminal, _ = env.step(actions) + + ## update return + total_reward += reward + print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") + # save observations for rendering + rollout.append(next_observation.copy()) + + obs = next_observation +except KeyboardInterrupt: + pass + +print(f"Total reward: {total_reward}") +render = MuJoCoRenderer(env) +show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) \ No newline at end of file diff --git a/src/diffusers/schedulers/scheduling_value_function.py b/src/diffusers/schedulers/scheduling_value_function.py index fc56a2b18a9f..246d65758522 100644 --- a/src/diffusers/schedulers/scheduling_value_function.py +++ b/src/diffusers/schedulers/scheduling_value_function.py @@ -239,7 +239,7 @@ def step( # 2. compute predicted original sample from predicted noise also called # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf - pred_original_sample = sample + scale * model_output + pred_original_sample = model_output # 3. Clip "predicted x_0" if self.config.clip_sample: @@ -262,7 +262,7 @@ def step( ).to(model_output.device) variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * noise - pred_prev_sample = pred_prev_sample + variance + pred_prev_sample = pred_prev_sample + variance * noise if not return_dict: return (pred_prev_sample,) From a39652986661c962463e98d3da67d3132ed813e1 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Mon, 10 Oct 2022 15:41:35 -0400 Subject: [PATCH 08/32] larger batch size for planning --- examples/diffuser/run_diffuser_value_guided.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 41bc977a06f4..6083232a8773 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -23,7 +23,7 @@ DTYPE = torch.float # diffusion model settings -n_samples = 4 # number of trajectories planned via diffusion +n_samples = 64 # number of trajectories planned via diffusion horizon = 32 # length of sampled trajectories state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] From 713bd80a5244a0f6106cc796ceab788c7c60c4c3 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Tue, 11 Oct 2022 11:37:49 -0400 Subject: [PATCH 09/32] more tests --- .gitignore | 4 +- convert_model.py | 15 ++ examples/diffuser/run_diffuser.py | 134 +++++++++++------- .../diffuser/run_diffuser_value_guided.py | 47 ++++-- 4 files changed, 132 insertions(+), 68 deletions(-) create mode 100644 convert_model.py diff --git a/.gitignore b/.gitignore index cf8183463613..f066e7f84299 100644 --- a/.gitignore +++ b/.gitignore @@ -163,4 +163,6 @@ tags *.lock # DS_Store (MacOS) -.DS_Store \ No newline at end of file +.DS_Store +*.mp4 +hub/* \ No newline at end of file diff --git a/convert_model.py b/convert_model.py new file mode 100644 index 000000000000..d7504cb38828 --- /dev/null +++ b/convert_model.py @@ -0,0 +1,15 @@ + +import torch +from diffusers import DDPMScheduler, TemporalUNet, ValueFunction, ValueFunctionScheduler + + + +model = torch.load("/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor32.torch") +state_dict = model.state_dict() +hf_value_function = TemporalUNet(training_horizon=32, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) +mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) +for k, v in mapping.items(): + state_dict[v] = state_dict.pop(k) +hf_value_function.load_state_dict(state_dict) + +torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2-unet/diffusion_pytorch_model.bin") diff --git a/examples/diffuser/run_diffuser.py b/examples/diffuser/run_diffuser.py index c672421c210c..97f38fd3f261 100644 --- a/examples/diffuser/run_diffuser.py +++ b/examples/diffuser/run_diffuser.py @@ -4,6 +4,7 @@ import tqdm import numpy as np import gym +import helpers env_name = "hopper-medium-expert-v2" env = gym.make(env_name) @@ -42,16 +43,11 @@ def to_torch(x_in, dtype=None, device=None): return x_in.to(device).type(dtype) return torch.tensor(x_in, dtype=dtype, device=device) -obs = env.reset() -obs_raw = obs -# normalize observations for forward passes -obs = normalize(obs, data, 'observations') from diffusers import DDPMScheduler, TemporalUNet # Two generators for different parts of the diffusion loop to work in colab -generator = torch.Generator(device='cuda') generator_cpu = torch.Generator(device='cpu') scheduler = DDPMScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2") @@ -72,50 +68,84 @@ def reset_x0(x_in, cond, act_dim): ## add a batch dimension and repeat for multiple samples ## [ observation_dim ] --> [ n_samples x observation_dim ] -obs = obs[None].repeat(n_samples, axis=0) -conditions = { - 0: to_torch(obs, device=DEVICE) - } - -# constants for inference -batch_size = len(conditions[0]) -shape = (batch_size, horizon, state_dim+action_dim) - -# sample random initial noise vector -x1 = torch.randn(shape, device=DEVICE, generator=generator) - -# this model is conditioned from an initial state, so you will see this function -# multiple times to change the initial state of generated data to the state -# generated via env.reset() above or env.step() below -x = reset_x0(x1, conditions, action_dim) - -# convert a np observation to torch for model forward pass -x = to_torch(x) - -eta = 1.0 # noise factor for sampling reconstructed state - -# run the diffusion process -# for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): -for i in tqdm.tqdm(scheduler.timesteps): - - # create batch of timesteps to pass into model - timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) - - # 1. generate prediction from model - with torch.no_grad(): - residual = network(x, timesteps).sample - - # 2. use the model prediction to reconstruct an observation (de-noise) - obs_reconstruct = scheduler.step(residual, i, x, predict_epsilon=predict_epsilon)["prev_sample"] - - # 3. [optional] add posterior noise to the sample - if eta > 0: - noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) - posterior_variance = scheduler._get_variance(i) # * noise - # no noise when t == 0 - # NOTE: original implementation missing sqrt on posterior_variance - obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated - - # 4. apply conditions to the trajectory - obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) - x = to_torch(obs_reconstruct_postcond) \ No newline at end of file +obs = env.reset() +total_reward = 0 +done = False +T = 300 +rollout = [obs.copy()] + +try: + for t in tqdm.tqdm(range(T)): + obs_raw = obs + + # normalize observations for forward passes + obs = normalize(obs, data, 'observations') + obs = obs[None].repeat(n_samples, axis=0) + conditions = { + 0: to_torch(obs, device=DEVICE) + } + + # constants for inference + batch_size = len(conditions[0]) + shape = (batch_size, horizon, state_dim+action_dim) + + # sample random initial noise vector + x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) + + # this model is conditioned from an initial state, so you will see this function + # multiple times to change the initial state of generated data to the state + # generated via env.reset() above or env.step() below + x = reset_x0(x1, conditions, action_dim) + + # convert a np observation to torch for model forward pass + x = to_torch(x) + + eta = 1.0 # noise factor for sampling reconstructed state + + # run the diffusion process + # for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): + for i in tqdm.tqdm(scheduler.timesteps): + + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) + + # 1. generate prediction from model + with torch.no_grad(): + residual = network(x, timesteps).sample + + # 2. use the model prediction to reconstruct an observation (de-noise) + obs_reconstruct = scheduler.step(residual, i, x, predict_epsilon=predict_epsilon)["prev_sample"] + + # 3. [optional] add posterior noise to the sample + if eta > 0: + noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) + posterior_variance = scheduler._get_variance(i) # * noise + # no noise when t == 0 + # NOTE: original implementation missing sqrt on posterior_variance + obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated + + # 4. apply conditions to the trajectory + obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) + x = to_torch(obs_reconstruct_postcond) + plans = helpers.to_np(x[:,:,:action_dim]) + # select random plan + idx = np.random.randint(plans.shape[0]) + # select action at correct time + action = plans[idx, 0, :] + actions= de_normalize(action, data, 'actions') + ## execute action in environment + next_observation, reward, terminal, _ = env.step(action) + + ## update return + total_reward += reward + print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") + + # save observations for rendering + rollout.append(next_observation.copy()) + obs = next_observation +except KeyboardInterrupt: + pass + +print(f"Total reward: {total_reward}") +render =helpers.MuJoCoRenderer(env) +helpers.show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) \ No newline at end of file diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 6083232a8773..2fae766bcbff 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -16,6 +16,7 @@ env_name = "hopper-medium-expert-v2" env = gym.make(env_name) data = env.get_dataset() # dataset is only used for normalization in this colab +render = MuJoCoRenderer(env) # Cuda settings for colab # torch.cuda.get_device_name(0) @@ -23,11 +24,11 @@ DTYPE = torch.float # diffusion model settings -n_samples = 64 # number of trajectories planned via diffusion +n_samples = 4 # number of trajectories planned via diffusion horizon = 32 # length of sampled trajectories state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] -num_inference_steps = 100 # number of difusion steps +num_inference_steps = 20 # number of difusion steps def normalize(x_in, data, key): upper = np.max(data[key], axis=0) @@ -56,7 +57,7 @@ def to_torch(x_in, dtype=None, device=None): # generator = torch.Generator(device='cuda') generator_cpu = torch.Generator(device='cpu') -scheduler = ValueFunctionScheduler(num_train_timesteps=20,beta_schedule="squaredcos_cap_v2", clip_sample=False) +scheduler = ValueFunctionScheduler(num_train_timesteps=num_inference_steps,beta_schedule="squaredcos_cap_v2", clip_sample=False) # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. @@ -77,13 +78,24 @@ def reset_x0(x_in, cond, act_dim): n_guide_steps = 2 scale_grad_by_std = True scale = 0.001 +eta = 1.0 # noise factor for sampling reconstructed state + ## add a batch dimension and repeat for multiple samples ## [ observation_dim ] --> [ n_samples x observation_dim ] obs = env.reset() +# start_idx = 340 +# obs = data['observations'][start_idx] +# qpos = data['infos/qpos'][start_idx] +# qvel = data['infos/qvel'][start_idx] + +# env.set_state(qpos, qvel) total_reward = 0 done = False T = 300 rollout = [obs.copy()] +trajectories = [] +y_maxes = [] +t_grad_cutoff = 0 try: for t in tqdm.tqdm(range(T)): obs_raw = obs @@ -112,7 +124,6 @@ def reset_x0(x_in, cond, act_dim): # convert a np observation to torch for model forward pass x = to_torch(x) - eta = 1.0 # noise factor for sampling reconstructed state # run the diffusion process # for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): @@ -130,12 +141,11 @@ def reset_x0(x_in, cond, act_dim): if scale_grad_by_std: posterior_variance = scheduler._get_variance(i) grad = posterior_variance * 0.5 * grad - grad[i < 4] = 0 + grad[timesteps < t_grad_cutoff] = 0 x = x.detach() x = x + scale * grad x = reset_x0(x, conditions, action_dim) prev_x = unet(x, timesteps).sample - # TODO: This should really be a TemporalUnet that predicts previos state given x x = scheduler.step(prev_x, i, x)["prev_sample"] x = reset_x0(x, conditions, action_dim) if clip_denoised: @@ -143,20 +153,25 @@ def reset_x0(x_in, cond, act_dim): # 2. use the model prediction to reconstruct an observation (de-noise) - # # 3. [optional] add posterior noise to the sample - # if eta > 0: - # noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) - # posterior_variance = scheduler._get_variance(i) # * noise - # # no noise when t == 0 - # # NOTE: original implementation missing sqrt on posterior_variance - # obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated + # 3. [optional] add posterior noise to the sample + if eta > 0: + noise = torch.randn(x.shape, generator=generator_cpu).to(x.device) + posterior_variance = scheduler._get_variance(i) # * noise + # no noise when t == 0 + # NOTE: original implementation missing sqrt on posterior_variance + x = x + int(i>0) * (0.5 * posterior_variance) * eta * noise # MJ had as log var, exponentiated # 4. apply conditions to the trajectory - # obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) + x = reset_x0(x, conditions, action_dim) x = to_torch(x) sorted_idx = y.argsort(0, descending=True).squeeze() + y_maxes.append(y[sorted_idx[0]]) sorted_values = x[sorted_idx] actions = sorted_values[:, :, :action_dim] + if t % 10 == 0: + trajectory = sorted_values[:, :, action_dim:][0].unsqueeze(0).detach().numpy() + trajectory = de_normalize(trajectory, data, 'observations') + trajectories.append(trajectory) actions = actions[0, 0].detach().cpu().numpy() actions = de_normalize(actions, data, key='actions') ## execute action in environment @@ -173,5 +188,7 @@ def reset_x0(x_in, cond, act_dim): pass print(f"Total reward: {total_reward}") -render = MuJoCoRenderer(env) +for i, trajectory in enumerate(trajectories): + show_sample(render, trajectory, f"trajectory_{i}.mp4") + show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) \ No newline at end of file From d9384ffeb3805190e54b74dc0dc5f17ac0d7d64f Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Tue, 11 Oct 2022 15:24:53 -0400 Subject: [PATCH 10/32] merge unet1d changes --- convert_model.py | 47 +++++++++++++++---- .../diffuser/run_diffuser_value_guided.py | 12 ++--- src/diffusers/__init__.py | 2 +- src/diffusers/models/__init__.py | 2 +- src/diffusers/models/unet_rl.py | 41 +++++----------- 5 files changed, 57 insertions(+), 47 deletions(-) diff --git a/convert_model.py b/convert_model.py index d7504cb38828..b44b4d390725 100644 --- a/convert_model.py +++ b/convert_model.py @@ -1,15 +1,44 @@ import torch -from diffusers import DDPMScheduler, TemporalUNet, ValueFunction, ValueFunctionScheduler +from diffusers import DDPMScheduler, UNet1DModel, ValueFunction, ValueFunctionScheduler +import os +import json +os.makedirs("hub/hopper-medium-v2/unet", exist_ok=True) +os.makedirs("hub/hopper-medium-v2/value_function", exist_ok=True) +def unet(): + model = torch.load("/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor32.torch") + state_dict = model.state_dict() + hf_value_function = UNet1DModel(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) + mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) + for k, v in mapping.items(): + state_dict[v] = state_dict.pop(k) + hf_value_function.load_state_dict(state_dict) + torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2/unet/diffusion_pytorch_model.bin") + config = dict(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) + with open("hub/hopper-medium-v2/unet/config.json", "w") as f: + json.dump(config, f) -model = torch.load("/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor32.torch") -state_dict = model.state_dict() -hf_value_function = TemporalUNet(training_horizon=32, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) -mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) -for k, v in mapping.items(): - state_dict[v] = state_dict.pop(k) -hf_value_function.load_state_dict(state_dict) +def value_function(): + model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-hor32.torch") + state_dict = model.state_dict() + hf_value_function = ValueFunction(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) + print(f"length of state dict: {len(state_dict.keys())}") + print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") -torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2-unet/diffusion_pytorch_model.bin") + mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) + for k, v in mapping.items(): + state_dict[v] = state_dict.pop(k) + + hf_value_function.load_state_dict(state_dict) + + torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2/value_function/diffusion_pytorch_model.bin") + config = dict(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) + with open("hub/hopper-medium-v2/value_function/config.json", "w") as f: + json.dump(config, f) + + +if __name__ == "__main__": + unet() + value_function() \ No newline at end of file diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 2fae766bcbff..ccbcb9204cfd 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -4,7 +4,7 @@ import tqdm import numpy as np import gym -from diffusers import DDPMScheduler, TemporalUNet, ValueFunction, ValueFunctionScheduler +from diffusers import DDPMScheduler, UNet1DModel, ValueFunction, ValueFunctionScheduler from helpers import MuJoCoRenderer, show_sample @@ -24,8 +24,8 @@ DTYPE = torch.float # diffusion model settings -n_samples = 4 # number of trajectories planned via diffusion -horizon = 32 # length of sampled trajectories +n_samples = 64 # number of trajectories planned via diffusion +horizon = 64 # length of sampled trajectories state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] num_inference_steps = 20 # number of difusion steps @@ -64,7 +64,7 @@ def to_torch(x_in, dtype=None, device=None): # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE) -unet = TemporalUNet.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor32").to(device=DEVICE) +unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor32").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) def reset_x0(x_in, cond, act_dim): @@ -78,7 +78,7 @@ def reset_x0(x_in, cond, act_dim): n_guide_steps = 2 scale_grad_by_std = True scale = 0.001 -eta = 1.0 # noise factor for sampling reconstructed state +eta = 0.0 # noise factor for sampling reconstructed state ## add a batch dimension and repeat for multiple samples ## [ observation_dim ] --> [ n_samples x observation_dim ] @@ -95,7 +95,7 @@ def reset_x0(x_in, cond, act_dim): rollout = [obs.copy()] trajectories = [] y_maxes = [] -t_grad_cutoff = 0 +t_grad_cutoff = 4 try: for t in tqdm.tqdm(range(T)): obs_raw = obs diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 528eb396fbb1..c21ba3c7c3c2 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -18,7 +18,7 @@ if is_torch_available(): from .modeling_utils import ModelMixin - from .models import AutoencoderKL, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel + from .models import AutoencoderKL, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel, ValueFunction from .optimization import ( get_constant_schedule, get_constant_schedule_with_warmup, diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index dc0946cf4d54..f7f2b77ecdf3 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -18,7 +18,7 @@ if is_torch_available(): from .unet_2d import UNet2DModel from .unet_2d_condition import UNet2DConditionModel - from .unet_rl import UNet1DModel + from .unet_rl import UNet1DModel, ValueFunction from .vae import AutoencoderKL, VQModel if is_flax_available(): diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index 73b81870b670..129d18f330a8 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -6,7 +6,7 @@ import torch.nn as nn from diffusers.models.resnet import ResidualTemporalBlock -from diffusers.models.unet_blocks import DownResnetBlock1D, UpResnetBlock1D +from diffusers.models.unet_blocks import DownResnetBlock1D, UpResnetBlock1D, Downsample1D from ..configuration_utils import ConfigMixin, register_to_config from ..modeling_utils import ModelMixin @@ -152,22 +152,13 @@ class ValueFunction(ModelMixin, ConfigMixin): @register_to_config def __init__( self, - training_horizon=128, transition_dim=14, - cond_dim=3, - predict_epsilon=False, - clip_denoised=True, dim=32, dim_mults=(1, 4, 8), - out_dim=1, ): super().__init__() self.transition_dim = transition_dim - self.cond_dim = cond_dim - self.predict_epsilon = predict_epsilon - self.clip_denoised = clip_denoised - self.time_proj = Timesteps(num_channels=dim, flip_sin_to_cos=False, downscale_freq_shift=1) self.time_mlp = TimestepEmbedding(channel=dim, time_embed_dim=4 * dim, act_fn="mish", out_dim=dim) @@ -181,17 +172,11 @@ def __init__( is_last = ind >= (num_resolutions - 1) self.blocks.append( - nn.ModuleList( - [ - ResidualTemporalBlock(dim_in, dim_out, embed_dim=dim), - ResidualTemporalBlock(dim_out, dim_out, embed_dim=dim), - Downsample1D(dim_out, use_conv=True), - ] + DownResnetBlock1D( + in_channels=dim_in, out_channels=dim_out, temb_channels=dim, add_downsample=True ) ) - if not is_last: - training_horizon = training_horizon // 2 mid_dim = dims[-1] mid_dim_2 = mid_dim // 2 @@ -199,17 +184,15 @@ def __init__( ## self.mid_block1 = ResidualTemporalBlock(mid_dim, mid_dim_2, embed_dim=dim) self.mid_down1 = Downsample1D(mid_dim_2, use_conv=True) - training_horizon = training_horizon // 2 ## self.mid_block2 = ResidualTemporalBlock(mid_dim_2, mid_dim_3, embed_dim=dim) self.mid_down2 = Downsample1D(mid_dim_3, use_conv=True) - training_horizon = training_horizon // 2 ## - fc_dim = mid_dim_3 * max(training_horizon, 1) + fc_dim = mid_dim_3 self.final_block = nn.ModuleList([ nn.Linear(fc_dim + dim, fc_dim // 2), nn.Mish(), - nn.Linear(fc_dim // 2, out_dim),] + nn.Linear(fc_dim // 2, 1),] ) def forward( @@ -217,7 +200,7 @@ def forward( sample: torch.FloatTensor, timestep: Union[torch.Tensor, float, int], return_dict: bool = True, - ) -> Union[TemporalUNetOutput, Tuple]: + ) -> Union[UNet1DOutput, Tuple]: """r Args: sample (`torch.FloatTensor`): (batch, horizon, obs_dimension + action_dimension) noisy inputs tensor @@ -240,14 +223,12 @@ def forward( t = self.time_proj(timesteps) t = self.time_mlp(t) - h = [] + down_block_res_samples = [] # 2. down - for resnet, resnet2, downsample in self.blocks: - sample = resnet(sample, t) - sample = resnet2(sample, t) - h.append(sample) - sample = downsample(sample) + for downsample_block in self.blocks: + sample, res_samples = downsample_block(hidden_states=sample, temb=t) + down_block_res_samples.append(res_samples[0]) # 3. mid sample = self.mid_block1(sample, t) @@ -263,4 +244,4 @@ def forward( if not return_dict: return (sample,) - return TemporalUNetOutput(sample=sample) + return UNet1DOutput(sample=sample) From 52e26680dfc888a31ea82dca83e18035db4e3883 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Tue, 11 Oct 2022 17:36:12 -0400 Subject: [PATCH 11/32] wandb for debugging, use newer models --- examples/diffuser/helpers.py | 1 + .../diffuser/run_diffuser_value_guided.py | 61 +++++++++---------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index ef853d3a6039..947c4d6cb216 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -40,6 +40,7 @@ def show_sample(renderer, observations, filename='sample.mp4', savebase='videos' images = np.concatenate(images, axis=2) media.write_video(savepath, images, fps=60) media.show_video(images, codec='h264', fps=60) + return images # Code adapted from Michael Janner # source: https://github.com/jannerm/diffuser/blob/main/diffuser/utils/rendering.py diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index ccbcb9204cfd..cdbc3fc84cb4 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -6,7 +6,19 @@ import gym from diffusers import DDPMScheduler, UNet1DModel, ValueFunction, ValueFunctionScheduler from helpers import MuJoCoRenderer, show_sample - +import wandb +wandb.init(project="diffusers-value-guided-rl") + +config = dict( + n_samples=64, + horizon=32, + num_inference_steps=20, + n_guide_steps=2, + scale_grad_by_std=True, + scale=0.001, + eta=0.0, + t_grad_cutoff=4 +) # model = torch.load("../diffuser/test.torch") # hf_value_function = ValueFunction(training_horizon=32, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) @@ -24,11 +36,8 @@ DTYPE = torch.float # diffusion model settings -n_samples = 64 # number of trajectories planned via diffusion -horizon = 64 # length of sampled trajectories state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] -num_inference_steps = 20 # number of difusion steps def normalize(x_in, data, key): upper = np.max(data[key], axis=0) @@ -57,29 +66,20 @@ def to_torch(x_in, dtype=None, device=None): # generator = torch.Generator(device='cuda') generator_cpu = torch.Generator(device='cpu') -scheduler = ValueFunctionScheduler(num_train_timesteps=num_inference_steps,beta_schedule="squaredcos_cap_v2", clip_sample=False) +scheduler = ValueFunctionScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False) # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE) -unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor32").to(device=DEVICE) +unet = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) def reset_x0(x_in, cond, act_dim): for key, val in cond.items(): x_in[:, key, act_dim:] = val.clone() return x_in - -# network specific constants for inference -clip_denoised = False -predict_epsilon = False -n_guide_steps = 2 -scale_grad_by_std = True -scale = 0.001 -eta = 0.0 # noise factor for sampling reconstructed state - ## add a batch dimension and repeat for multiple samples ## [ observation_dim ] --> [ n_samples x observation_dim ] obs = env.reset() @@ -91,11 +91,10 @@ def reset_x0(x_in, cond, act_dim): # env.set_state(qpos, qvel) total_reward = 0 done = False -T = 300 +T = 200 rollout = [obs.copy()] trajectories = [] y_maxes = [] -t_grad_cutoff = 4 try: for t in tqdm.tqdm(range(T)): obs_raw = obs @@ -103,7 +102,7 @@ def reset_x0(x_in, cond, act_dim): # normalize observations for forward passes obs = normalize(obs, data, 'observations') - obs = obs[None].repeat(n_samples, axis=0) + obs = obs[None].repeat(config['n_samples'], axis=0) conditions = { 0: to_torch(obs, device=DEVICE) } @@ -111,7 +110,7 @@ def reset_x0(x_in, cond, act_dim): # 2. Call the diffusion model # constants for inference batch_size = len(conditions[0]) - shape = (batch_size, horizon, state_dim+action_dim) + shape = (batch_size, config['horizon'], state_dim+action_dim) # sample random initial noise vector x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) @@ -133,39 +132,37 @@ def reset_x0(x_in, cond, act_dim): timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) # 3. call the sample function - for _ in range(n_guide_steps): + for _ in range(config['n_guide_steps']): with torch.enable_grad(): x.requires_grad_() y = network(x, timesteps).sample grad = torch.autograd.grad([y.sum()], [x])[0] - if scale_grad_by_std: + if config['scale_grad_by_std']: posterior_variance = scheduler._get_variance(i) grad = posterior_variance * 0.5 * grad - grad[timesteps < t_grad_cutoff] = 0 + grad[timesteps < config['t_grad_cutoff']] = 0 x = x.detach() - x = x + scale * grad + x = x + config['scale'] * grad x = reset_x0(x, conditions, action_dim) + y = network(x, timesteps).sample prev_x = unet(x, timesteps).sample x = scheduler.step(prev_x, i, x)["prev_sample"] x = reset_x0(x, conditions, action_dim) - if clip_denoised: - x.clamp_(-1., 1.) - # 2. use the model prediction to reconstruct an observation (de-noise) # 3. [optional] add posterior noise to the sample - if eta > 0: + if config['eta'] > 0: noise = torch.randn(x.shape, generator=generator_cpu).to(x.device) posterior_variance = scheduler._get_variance(i) # * noise # no noise when t == 0 # NOTE: original implementation missing sqrt on posterior_variance - x = x + int(i>0) * (0.5 * posterior_variance) * eta * noise # MJ had as log var, exponentiated + x = x + int(i>0) * (0.5 * posterior_variance) * config['eta'] * noise # MJ had as log var, exponentiated # 4. apply conditions to the trajectory x = reset_x0(x, conditions, action_dim) x = to_torch(x) sorted_idx = y.argsort(0, descending=True).squeeze() - y_maxes.append(y[sorted_idx[0]]) + y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) sorted_values = x[sorted_idx] actions = sorted_values[:, :, :action_dim] if t % 10 == 0: @@ -179,6 +176,7 @@ def reset_x0(x_in, cond, act_dim): ## update return total_reward += reward + wandb.log({"total_reward": total_reward, "reward": reward, "y_max": y_maxes[-1], "diff_from_expert_reward": reward - data['rewards'][t]}) print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") # save observations for rendering rollout.append(next_observation.copy()) @@ -188,7 +186,6 @@ def reset_x0(x_in, cond, act_dim): pass print(f"Total reward: {total_reward}") -for i, trajectory in enumerate(trajectories): - show_sample(render, trajectory, f"trajectory_{i}.mp4") -show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) \ No newline at end of file +images = show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) +wandb.log({"rollout": wandb.Video('videos/sample.mp4', fps=60, format='mp4')}) \ No newline at end of file From 75fe8b463a7d42dda78462ee3efba3220c1009c0 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Tue, 11 Oct 2022 18:01:39 -0400 Subject: [PATCH 12/32] success! --- examples/diffuser/run_diffuser_value_guided.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index cdbc3fc84cb4..c0ae2e1ea4b9 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -10,14 +10,14 @@ wandb.init(project="diffusers-value-guided-rl") config = dict( - n_samples=64, + n_samples=4, horizon=32, - num_inference_steps=20, - n_guide_steps=2, + num_inference_steps=100, + n_guide_steps=0, scale_grad_by_std=True, scale=0.001, eta=0.0, - t_grad_cutoff=4 + t_grad_cutoff=0 ) # model = torch.load("../diffuser/test.torch") @@ -73,7 +73,7 @@ def to_torch(x_in, dtype=None, device=None): # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE) -unet = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) +unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor32").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) def reset_x0(x_in, cond, act_dim): @@ -91,7 +91,7 @@ def reset_x0(x_in, cond, act_dim): # env.set_state(qpos, qvel) total_reward = 0 done = False -T = 200 +T = 400 rollout = [obs.copy()] trajectories = [] y_maxes = [] From c7fe1dc4768fce7cee311fcaef31f8788367bcc6 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Tue, 11 Oct 2022 22:59:17 -0400 Subject: [PATCH 13/32] turns out we just need more diffusion steps --- examples/diffuser/helpers.py | 68 +++++++++++++ .../diffuser/run_diffuser_value_guided.py | 97 ++++--------------- 2 files changed, 86 insertions(+), 79 deletions(-) diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index 947c4d6cb216..7d103ee7eba8 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -4,6 +4,74 @@ import torch import gym import warnings +import tqdm + +DEVICE = 'cpu' + +DTYPE = torch.float +def normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = 2*(x_in - lower)/(upper-lower) - 1 + return x_out + +def de_normalize(x_in, data, key): + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = lower + (upper - lower)*(1 + x_in) /2 + return x_out + +def to_torch(x_in, dtype=None, device=None): + dtype = dtype or DTYPE + device = device or DEVICE + if type(x_in) is dict: + return {k: to_torch(v, dtype, device) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(device).type(dtype) + return torch.tensor(x_in, dtype=dtype, device=device) + + +def reset_x0(x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + +def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config): + for i in tqdm.tqdm(scheduler.timesteps): + + # create batch of timesteps to pass into model + timesteps = torch.full((config['n_samples'],), i, device=DEVICE, dtype=torch.long) + + # 3. call the sample function + for _ in range(config['n_guide_steps']): + with torch.enable_grad(): + x.requires_grad_() + y = network(x, timesteps).sample + grad = torch.autograd.grad([y.sum()], [x])[0] + if config['scale_grad_by_std']: + posterior_variance = scheduler._get_variance(i) + grad = posterior_variance * 0.5 * grad + grad[timesteps < config['t_grad_cutoff']] = 0 + x = x.detach() + x = x + config['scale'] * grad + x = reset_x0(x, conditions, action_dim) + y = network(x, timesteps).sample + prev_x = unet(x, timesteps).sample + x = scheduler.step(prev_x, i, x)["prev_sample"] + + # 3. [optional] add posterior noise to the sample + if config['eta'] > 0: + noise = torch.randn(x.shape, generator=generator).to(x.device) + posterior_variance = scheduler._get_variance(i) # * noise + # no noise when t == 0 + # NOTE: original implementation missing sqrt on posterior_variance + x = x + int(i>0) * (0.5 * posterior_variance) * config['eta'] * noise # MJ had as log var, exponentiated + + # 4. apply conditions to the trajectory + x = reset_x0(x, conditions, action_dim) + x = to_torch(x) + return x, y + def to_np(x_in): if torch.is_tensor(x_in): x_in = x_in.detach().cpu().numpy() diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index c0ae2e1ea4b9..41d2632c2202 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -6,18 +6,19 @@ import gym from diffusers import DDPMScheduler, UNet1DModel, ValueFunction, ValueFunctionScheduler from helpers import MuJoCoRenderer, show_sample +import helpers import wandb wandb.init(project="diffusers-value-guided-rl") config = dict( n_samples=4, horizon=32, - num_inference_steps=100, + num_inference_steps=200, n_guide_steps=0, scale_grad_by_std=True, scale=0.001, eta=0.0, - t_grad_cutoff=0 + t_grad_cutoff=4 ) # model = torch.load("../diffuser/test.torch") @@ -39,26 +40,6 @@ state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] -def normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = 2*(x_in - lower)/(upper-lower) - 1 - return x_out - -def de_normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = lower + (upper - lower)*(1 + x_in) /2 - return x_out - -def to_torch(x_in, dtype=None, device=None): - dtype = dtype or DTYPE - device = device or DEVICE - if type(x_in) is dict: - return {k: to_torch(v, dtype, device) for k, v in x_in.items()} - elif torch.is_tensor(x_in): - return x_in.to(device).type(dtype) - return torch.tensor(x_in, dtype=dtype, device=device) @@ -76,19 +57,10 @@ def to_torch(x_in, dtype=None, device=None): unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor32").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) -def reset_x0(x_in, cond, act_dim): - for key, val in cond.items(): - x_in[:, key, act_dim:] = val.clone() - return x_in + ## add a batch dimension and repeat for multiple samples ## [ observation_dim ] --> [ n_samples x observation_dim ] obs = env.reset() -# start_idx = 340 -# obs = data['observations'][start_idx] -# qpos = data['infos/qpos'][start_idx] -# qvel = data['infos/qvel'][start_idx] - -# env.set_state(qpos, qvel) total_reward = 0 done = False T = 400 @@ -100,11 +72,11 @@ def reset_x0(x_in, cond, act_dim): obs_raw = obs # 1. Call the policy # normalize observations for forward passes - obs = normalize(obs, data, 'observations') + obs = helpers.normalize(obs, data, 'observations') obs = obs[None].repeat(config['n_samples'], axis=0) conditions = { - 0: to_torch(obs, device=DEVICE) + 0: helpers.to_torch(obs, device=DEVICE) } # 2. Call the diffusion model @@ -118,61 +90,28 @@ def reset_x0(x_in, cond, act_dim): # this model is conditioned from an initial state, so you will see this function # multiple times to change the initial state of generated data to the state # generated via env.reset() above or env.step() below - x = reset_x0(x1, conditions, action_dim) + x = helpers.reset_x0(x1, conditions, action_dim) # convert a np observation to torch for model forward pass - x = to_torch(x) - - - # run the diffusion process - # for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): - for i in tqdm.tqdm(scheduler.timesteps): - - # create batch of timesteps to pass into model - timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) - - # 3. call the sample function - for _ in range(config['n_guide_steps']): - with torch.enable_grad(): - x.requires_grad_() - y = network(x, timesteps).sample - grad = torch.autograd.grad([y.sum()], [x])[0] - if config['scale_grad_by_std']: - posterior_variance = scheduler._get_variance(i) - grad = posterior_variance * 0.5 * grad - grad[timesteps < config['t_grad_cutoff']] = 0 - x = x.detach() - x = x + config['scale'] * grad - x = reset_x0(x, conditions, action_dim) - y = network(x, timesteps).sample - prev_x = unet(x, timesteps).sample - x = scheduler.step(prev_x, i, x)["prev_sample"] - x = reset_x0(x, conditions, action_dim) - - - # 3. [optional] add posterior noise to the sample - if config['eta'] > 0: - noise = torch.randn(x.shape, generator=generator_cpu).to(x.device) - posterior_variance = scheduler._get_variance(i) # * noise - # no noise when t == 0 - # NOTE: original implementation missing sqrt on posterior_variance - x = x + int(i>0) * (0.5 * posterior_variance) * config['eta'] * noise # MJ had as log var, exponentiated - - # 4. apply conditions to the trajectory - x = reset_x0(x, conditions, action_dim) - x = to_torch(x) + x = helpers.to_torch(x) + x, y = helpers.run_diffusion(x, scheduler, generator_cpu, network, unet, conditions, action_dim, config) sorted_idx = y.argsort(0, descending=True).squeeze() y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) sorted_values = x[sorted_idx] actions = sorted_values[:, :, :action_dim] if t % 10 == 0: trajectory = sorted_values[:, :, action_dim:][0].unsqueeze(0).detach().numpy() - trajectory = de_normalize(trajectory, data, 'observations') + trajectory = helpers.de_normalize(trajectory, data, 'observations') trajectories.append(trajectory) - actions = actions[0, 0].detach().cpu().numpy() - actions = de_normalize(actions, data, key='actions') + + actions = actions.detach().cpu().numpy() + denorm_actions = helpers.de_normalize(actions, data, key='actions') + # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] + denorm_actions = denorm_actions[0, 0] + + ## execute action in environment - next_observation, reward, terminal, _ = env.step(actions) + next_observation, reward, terminal, _ = env.step(denorm_actions) ## update return total_reward += reward From a6871b1bfd82f000ac9dd369b5ba1b185a5b7021 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Wed, 12 Oct 2022 16:47:19 -0400 Subject: [PATCH 14/32] run on modal --- convert_model.py | 6 +- examples/diffuser/helpers.py | 14 +- .../diffuser/run_diffuser_value_guided.py | 261 ++++++++++-------- 3 files changed, 160 insertions(+), 121 deletions(-) diff --git a/convert_model.py b/convert_model.py index b44b4d390725..4323854faadd 100644 --- a/convert_model.py +++ b/convert_model.py @@ -7,16 +7,16 @@ os.makedirs("hub/hopper-medium-v2/value_function", exist_ok=True) def unet(): - model = torch.load("/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor32.torch") + model = torch.load("/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor128.torch") state_dict = model.state_dict() - hf_value_function = UNet1DModel(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) + hf_value_function = UNet1DModel(dim=32, dim_mults=(1, 4, 8), transition_dim=14) mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) for k, v in mapping.items(): state_dict[v] = state_dict.pop(k) hf_value_function.load_state_dict(state_dict) torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2/unet/diffusion_pytorch_model.bin") - config = dict(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) + config = dict(dim=32, dim_mults=(1, 4, 8), transition_dim=14) with open("hub/hopper-medium-v2/unet/config.json", "w") as f: json.dump(config, f) diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index 7d103ee7eba8..d8dc50e77388 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -6,7 +6,6 @@ import warnings import tqdm -DEVICE = 'cpu' DTYPE = torch.float def normalize(x_in, data, key): @@ -21,9 +20,9 @@ def de_normalize(x_in, data, key): x_out = lower + (upper - lower)*(1 + x_in) /2 return x_out -def to_torch(x_in, dtype=None, device=None): +def to_torch(x_in, dtype=None, device='cuda'): dtype = dtype or DTYPE - device = device or DEVICE + device = device if type(x_in) is dict: return {k: to_torch(v, dtype, device) for k, v in x_in.items()} elif torch.is_tensor(x_in): @@ -37,11 +36,11 @@ def reset_x0(x_in, cond, act_dim): return x_in def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config): + y = None for i in tqdm.tqdm(scheduler.timesteps): # create batch of timesteps to pass into model - timesteps = torch.full((config['n_samples'],), i, device=DEVICE, dtype=torch.long) - + timesteps = torch.full((config['n_samples'],), i, device=config['device'], dtype=torch.long) # 3. call the sample function for _ in range(config['n_guide_steps']): with torch.enable_grad(): @@ -55,8 +54,8 @@ def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim x = x.detach() x = x + config['scale'] * grad x = reset_x0(x, conditions, action_dim) - y = network(x, timesteps).sample - prev_x = unet(x, timesteps).sample + with torch.no_grad(): + prev_x = unet(x, timesteps).sample x = scheduler.step(prev_x, i, x)["prev_sample"] # 3. [optional] add posterior noise to the sample @@ -70,6 +69,7 @@ def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim # 4. apply conditions to the trajectory x = reset_x0(x, conditions, action_dim) x = to_torch(x) + # y = network(x, timesteps).sample return x, y def to_np(x_in): diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 41d2632c2202..535b647a64c9 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -8,123 +8,162 @@ from helpers import MuJoCoRenderer, show_sample import helpers import wandb -wandb.init(project="diffusers-value-guided-rl") +import modal +import os + +stub = modal.Stub("diffusers-value-guided") +image = modal.Image.debian_slim().apt_install([ + "libgl1-mesa-dev", + "libgl1-mesa-glx", + "libglew-dev", + "libosmesa6-dev", + "software-properties-common", + "patchelf", + "git", + "ffmpeg", +]).pip_install([ + "torch", + "datasets", + "transformers", + "free-mujoco-py", + "einops", + "gym", + "protobuf==3.20.1", + "git+https://github.com/rail-berkeley/d4rl.git", + "wandb", + "mediapy", + "Pillow==9.0.0", + "moviepy", + "imageio" + ]) config = dict( - n_samples=4, - horizon=32, + n_samples=64, + horizon=128, num_inference_steps=200, n_guide_steps=0, scale_grad_by_std=True, scale=0.001, eta=0.0, - t_grad_cutoff=4 + t_grad_cutoff=4, + device='cuda' ) -# model = torch.load("../diffuser/test.torch") -# hf_value_function = ValueFunction(training_horizon=32, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) -# hf_value_function.load_state_dict(model.state_dict()) -# hf_value_function.to_hub("bglick13/hf_value_function") - -env_name = "hopper-medium-expert-v2" -env = gym.make(env_name) -data = env.get_dataset() # dataset is only used for normalization in this colab -render = MuJoCoRenderer(env) - -# Cuda settings for colab -# torch.cuda.get_device_name(0) -DEVICE = 'cpu' -DTYPE = torch.float - -# diffusion model settings -state_dim = env.observation_space.shape[0] -action_dim = env.action_space.shape[0] - - - - -# Two generators for different parts of the diffusion loop to work in colab -# generator = torch.Generator(device='cuda') -generator_cpu = torch.Generator(device='cpu') - -scheduler = ValueFunctionScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False) - -# 3 different pretrained models are available for this task. -# The horizion represents the length of trajectories used in training. -# network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) - -network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE) -unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor32").to(device=DEVICE) -# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) -# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) - -## add a batch dimension and repeat for multiple samples -## [ observation_dim ] --> [ n_samples x observation_dim ] -obs = env.reset() -total_reward = 0 -done = False -T = 400 -rollout = [obs.copy()] -trajectories = [] -y_maxes = [] -try: - for t in tqdm.tqdm(range(T)): - obs_raw = obs - # 1. Call the policy - # normalize observations for forward passes - obs = helpers.normalize(obs, data, 'observations') - - obs = obs[None].repeat(config['n_samples'], axis=0) - conditions = { - 0: helpers.to_torch(obs, device=DEVICE) - } - - # 2. Call the diffusion model - # constants for inference - batch_size = len(conditions[0]) - shape = (batch_size, config['horizon'], state_dim+action_dim) - - # sample random initial noise vector - x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) - - # this model is conditioned from an initial state, so you will see this function - # multiple times to change the initial state of generated data to the state - # generated via env.reset() above or env.step() below - x = helpers.reset_x0(x1, conditions, action_dim) - - # convert a np observation to torch for model forward pass - x = helpers.to_torch(x) - x, y = helpers.run_diffusion(x, scheduler, generator_cpu, network, unet, conditions, action_dim, config) - sorted_idx = y.argsort(0, descending=True).squeeze() - y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) - sorted_values = x[sorted_idx] - actions = sorted_values[:, :, :action_dim] - if t % 10 == 0: - trajectory = sorted_values[:, :, action_dim:][0].unsqueeze(0).detach().numpy() - trajectory = helpers.de_normalize(trajectory, data, 'observations') - trajectories.append(trajectory) - - actions = actions.detach().cpu().numpy() - denorm_actions = helpers.de_normalize(actions, data, key='actions') - # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] - denorm_actions = denorm_actions[0, 0] - - - ## execute action in environment - next_observation, reward, terminal, _ = env.step(denorm_actions) - - ## update return - total_reward += reward - wandb.log({"total_reward": total_reward, "reward": reward, "y_max": y_maxes[-1], "diff_from_expert_reward": reward - data['rewards'][t]}) - print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") - # save observations for rendering - rollout.append(next_observation.copy()) - - obs = next_observation -except KeyboardInterrupt: - pass - -print(f"Total reward: {total_reward}") - -images = show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) -wandb.log({"rollout": wandb.Video('videos/sample.mp4', fps=60, format='mp4')}) \ No newline at end of file +@stub.function( + image=image, + secret=modal.Secret.from_name("wandb-api-key"), + mounts=modal.create_package_mounts(["diffusers"]), + gpu=True +) +def run(): + wandb.login(key=os.environ["WANDB_API_KEY"]) + wandb.init(project="diffusers-value-guided-rl") + + env_name = "hopper-medium-expert-v2" + env = gym.make(env_name) + data = env.get_dataset() # dataset is only used for normalization in this colab + render = MuJoCoRenderer(env) + + # Cuda settings for colab + # torch.cuda.get_device_name(0) + DEVICE = config['device'] + DTYPE = torch.float + + # diffusion model settings + state_dim = env.observation_space.shape[0] + action_dim = env.action_space.shape[0] + + # Two generators for different parts of the diffusion loop to work in colab + # generator = torch.Generator(device='cuda') + generator = torch.Generator(device=DEVICE) + + scheduler = ValueFunctionScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False) + + # 3 different pretrained models are available for this task. + # The horizion represents the length of trajectories used in training. + # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) + + network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE).eval() + unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor128").to(device=DEVICE).eval() + # unet = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) + # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) + + ## add a batch dimension and repeat for multiple samples + ## [ observation_dim ] --> [ n_samples x observation_dim ] + obs = env.reset() + total_reward = 0 + done = False + T = 200 + rollout = [obs.copy()] + trajectories = [] + y_maxes = [0] + try: + for t in tqdm.tqdm(range(T)): + obs_raw = obs + # 1. Call the policy + # normalize observations for forward passes + obs = helpers.normalize(obs, data, 'observations') + + obs = obs[None].repeat(config['n_samples'], axis=0) + conditions = { + 0: helpers.to_torch(obs, device=DEVICE) + } + + # 2. Call the diffusion model + # constants for inference + batch_size = len(conditions[0]) + shape = (batch_size, config['horizon'], state_dim+action_dim) + + # sample random initial noise vector + x1 = torch.randn(shape, device=DEVICE, generator=generator) + + # this model is conditioned from an initial state, so you will see this function + # multiple times to change the initial state of generated data to the state + # generated via env.reset() above or env.step() below + x = helpers.reset_x0(x1, conditions, action_dim) + + # convert a np observation to torch for model forward pass + x = helpers.to_torch(x, device=DEVICE) + x, y = helpers.run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config) + if y: + sorted_idx = y.argsort(0, descending=True).squeeze() + y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) + sorted_values = x[sorted_idx] + else: + sorted_values = x + actions = sorted_values[:, :, :action_dim] + if t % 10 == 0: + trajectory = sorted_values[:, :, action_dim:][0].unsqueeze(0).detach().cpu().numpy() + trajectory = helpers.de_normalize(trajectory, data, 'observations') + trajectories.append(trajectory) + + actions = actions.detach().cpu().numpy() + denorm_actions = helpers.de_normalize(actions, data, key='actions') + denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] + # denorm_actions = denorm_actions[0, 0] + + + ## execute action in environment + next_observation, reward, terminal, _ = env.step(denorm_actions) + + ## update return + total_reward += reward + wandb.log({"total_reward": total_reward, "reward": reward, "y_max": y_maxes[-1], "diff_from_expert_reward": reward - data['rewards'][t]}) + print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") + # save observations for rendering + rollout.append(next_observation.copy()) + + obs = next_observation + except KeyboardInterrupt: + pass + + print(f"Total reward: {total_reward}") + + images = show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) + wandb.log({"rollout": wandb.Video("videos/sample.mp4", fps=60, format='mp4')}) + + +if __name__ == "__main__": + # run() + with stub.run(): + run() From 38616cf7a6e971ae8def8a5449d97bc096a98af6 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Wed, 12 Oct 2022 16:57:41 -0400 Subject: [PATCH 15/32] merge and code cleanup --- convert_model.py | 18 +- examples/diffuser/helpers.py | 2 +- .../diffuser/run_diffuser_value_guided.py | 2 +- src/diffusers/models/__init__.py | 1 + src/diffusers/models/unet_rl.py | 136 +------- .../schedulers/scheduling_value_function.py | 298 ------------------ 6 files changed, 22 insertions(+), 435 deletions(-) delete mode 100644 src/diffusers/schedulers/scheduling_value_function.py diff --git a/convert_model.py b/convert_model.py index 4323854faadd..cfb5db24570c 100644 --- a/convert_model.py +++ b/convert_model.py @@ -3,11 +3,13 @@ from diffusers import DDPMScheduler, UNet1DModel, ValueFunction, ValueFunctionScheduler import os import json -os.makedirs("hub/hopper-medium-v2/unet", exist_ok=True) +os.makedirs("hub/hopper-medium-v2/unet/hor32", exist_ok=True) +os.makedirs("hub/hopper-medium-v2/unet/hor128", exist_ok=True) + os.makedirs("hub/hopper-medium-v2/value_function", exist_ok=True) -def unet(): - model = torch.load("/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor128.torch") +def unet(hor): + model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor{hor}.torch") state_dict = model.state_dict() hf_value_function = UNet1DModel(dim=32, dim_mults=(1, 4, 8), transition_dim=14) mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) @@ -15,9 +17,13 @@ def unet(): state_dict[v] = state_dict.pop(k) hf_value_function.load_state_dict(state_dict) - torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2/unet/diffusion_pytorch_model.bin") - config = dict(dim=32, dim_mults=(1, 4, 8), transition_dim=14) - with open("hub/hopper-medium-v2/unet/config.json", "w") as f: + torch.save(hf_value_function.state_dict(), f"hub/hopper-medium-v2/unet/hor{hor}/diffusion_pytorch_model.bin") + if hor == 128: + dim_mults = (1, 4, 8) + elif hor == 32: + dim_mults = (1, 2, 4, 8) + config = dict(dim=32, dim_mults=dim_mults, transition_dim=14) + with open(f"hub/hopper-medium-v2/unet/hor{hor}/config.json", "w") as f: json.dump(config, f) def value_function(): diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index d8dc50e77388..6866c415f4e4 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -56,7 +56,7 @@ def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim x = reset_x0(x, conditions, action_dim) with torch.no_grad(): prev_x = unet(x, timesteps).sample - x = scheduler.step(prev_x, i, x)["prev_sample"] + x = scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] # 3. [optional] add posterior noise to the sample if config['eta'] > 0: diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 535b647a64c9..6c29acb62b7c 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -77,7 +77,7 @@ def run(): # generator = torch.Generator(device='cuda') generator = torch.Generator(device=DEVICE) - scheduler = ValueFunctionScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False) + scheduler = DDPMScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False, ) # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index c5d53b2feb4b..a1a5722ef5f8 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -20,6 +20,7 @@ from .unet_2d import UNet2DModel from .unet_2d_condition import UNet2DConditionModel from .vae import AutoencoderKL, VQModel + from .unet_rl import UNetRLModel if is_flax_available(): from .unet_2d_condition_flax import FlaxUNet2DConditionModel diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index 129d18f330a8..fe33259cda40 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -5,8 +5,8 @@ import torch import torch.nn as nn -from diffusers.models.resnet import ResidualTemporalBlock -from diffusers.models.unet_blocks import DownResnetBlock1D, UpResnetBlock1D, Downsample1D +from diffusers.models.resnet import ResidualTemporalBlock1D +from diffusers.models.unet_1d_blocks import DownResnetBlock1D, UpResnetBlock1D, Downsample1D from ..configuration_utils import ConfigMixin, register_to_config from ..modeling_utils import ModelMixin @@ -16,7 +16,7 @@ @dataclass -class UNet1DOutput(BaseOutput): +class ValueFunctionOutput(BaseOutput): """ Args: sample (`torch.FloatTensor` of shape `(batch, horizon, obs_dimension)`): @@ -26,128 +26,6 @@ class UNet1DOutput(BaseOutput): sample: torch.FloatTensor -class UNet1DModel(ModelMixin, ConfigMixin): - """ - A UNet for multi-dimensional temporal data. This model takes the batch over the `training_horizon`. - - Parameters: - transition_dim: state-dimension of samples to predict over - dim: embedding dimension of model - dim_mults: dimension multiples of the up/down blocks - """ - - @register_to_config - def __init__( - self, - transition_dim=14, - dim=32, - dim_mults=(1, 4, 8), - ): - super().__init__() - - self.transition_dim = transition_dim - - # time - self.time_proj = Timesteps(num_channels=dim, flip_sin_to_cos=False, downscale_freq_shift=1) - self.time_mlp = TimestepEmbedding(channel=dim, time_embed_dim=4 * dim, act_fn="mish", out_dim=dim) - - dims = [transition_dim, *map(lambda m: dim * m, dim_mults)] - in_out = list(zip(dims[:-1], dims[1:])) - - self.down_blocks = nn.ModuleList([]) - self.up_blocks = nn.ModuleList([]) - num_resolutions = len(in_out) - - # down - for ind, (dim_in, dim_out) in enumerate(in_out): - is_last = ind >= (num_resolutions - 1) - - self.down_blocks.append( - DownResnetBlock1D( - in_channels=dim_in, out_channels=dim_out, temb_channels=dim, add_downsample=(not is_last) - ) - ) - - # mid - mid_dim = dims[-1] - self.mid_block1 = ResidualTemporalBlock(mid_dim, mid_dim, embed_dim=dim) - self.mid_block2 = ResidualTemporalBlock(mid_dim, mid_dim, embed_dim=dim) - - # up - for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])): - is_last = ind >= (num_resolutions - 1) - - self.up_blocks.append( - UpResnetBlock1D( - in_channels=dim_out * 2, out_channels=dim_in, temb_channels=dim, add_upsample=(not is_last) - ) - ) - - # out - self.final_conv1d_1 = nn.Conv1d(dim, dim, 5, padding=2) - self.final_conv1d_gn = nn.GroupNorm(8, dim) - self.final_conv1d_act = nn.Mish() - self.final_conv1d_2 = nn.Conv1d(dim, transition_dim, 1) - - def forward( - self, - sample: torch.FloatTensor, - timestep: Union[torch.Tensor, float, int], - return_dict: bool = True, - ) -> Union[UNet1DOutput, Tuple]: - r""" - Args: - sample (`torch.FloatTensor`): (batch, horizon, obs_dimension + action_dimension) noisy inputs tensor - timestep (`torch.FloatTensor` or `float` or `int): batch (batch) timesteps - return_dict (`bool`, *optional*, defaults to `True`): - Whether or not to return a [`~models.unet_2d.UNet2DOutput`] instead of a plain tuple. - - Returns: - [`~models.unet_2d.UNet2DOutput`] or `tuple`: [`~models.unet_2d.UNet2DOutput`] if `return_dict` is True, - otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. - """ - sample = sample.permute(0, 2, 1) - - # 1. time - timesteps = timestep - if not torch.is_tensor(timesteps): - timesteps = torch.tensor([timesteps], dtype=torch.long, device=sample.device) - elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0: - timesteps = timesteps[None].to(sample.device) - - temb = self.time_proj(timesteps) - temb = self.time_mlp(temb) - down_block_res_samples = [] - - # 2. down - for downsample_block in self.down_blocks: - sample, res_samples = downsample_block(hidden_states=sample, temb=temb) - down_block_res_samples.append(res_samples[0]) - - # 3. mid - sample = self.mid_block1(sample, temb) - sample = self.mid_block2(sample, temb) - - # 4. up - for up_block in self.up_blocks: - sample = up_block(hidden_states=sample, res_hidden_states=down_block_res_samples.pop(), temb=temb) - - # 5. post-process - sample = self.final_conv1d_1(sample) - sample = rearrange_dims(sample) - sample = self.final_conv1d_gn(sample) - sample = rearrange_dims(sample) - sample = self.final_conv1d_act(sample) - sample = self.final_conv1d_2(sample) - - sample = sample.permute(0, 2, 1) - - if not return_dict: - return (sample,) - - return UNet1DOutput(sample=sample) - - class ValueFunction(ModelMixin, ConfigMixin): @register_to_config def __init__( @@ -182,10 +60,10 @@ def __init__( mid_dim_2 = mid_dim // 2 mid_dim_3 = mid_dim // 4 ## - self.mid_block1 = ResidualTemporalBlock(mid_dim, mid_dim_2, embed_dim=dim) + self.mid_block1 = ResidualTemporalBlock1D(mid_dim, mid_dim_2, embed_dim=dim) self.mid_down1 = Downsample1D(mid_dim_2, use_conv=True) ## - self.mid_block2 = ResidualTemporalBlock(mid_dim_2, mid_dim_3, embed_dim=dim) + self.mid_block2 = ResidualTemporalBlock1D(mid_dim_2, mid_dim_3, embed_dim=dim) self.mid_down2 = Downsample1D(mid_dim_3, use_conv=True) ## fc_dim = mid_dim_3 @@ -200,7 +78,7 @@ def forward( sample: torch.FloatTensor, timestep: Union[torch.Tensor, float, int], return_dict: bool = True, - ) -> Union[UNet1DOutput, Tuple]: + ) -> Union[ValueFunctionOutput, Tuple]: """r Args: sample (`torch.FloatTensor`): (batch, horizon, obs_dimension + action_dimension) noisy inputs tensor @@ -244,4 +122,4 @@ def forward( if not return_dict: return (sample,) - return UNet1DOutput(sample=sample) + return ValueFunctionOutput(sample=sample) diff --git a/src/diffusers/schedulers/scheduling_value_function.py b/src/diffusers/schedulers/scheduling_value_function.py deleted file mode 100644 index 246d65758522..000000000000 --- a/src/diffusers/schedulers/scheduling_value_function.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright 2022 UC Berkeley Team and The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim - -import math -import warnings -from dataclasses import dataclass -from typing import Optional, Tuple, Union - -import numpy as np -import torch - -from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput -from .scheduling_utils import SchedulerMixin - - -@dataclass -class ValueFunctionSchedulerOutput(BaseOutput): - """ - Output class for the scheduler's step function output. - - Args: - prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): - Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the - denoising loop. - pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): - The predicted denoised sample (x_{0}) based on the model output from the current timestep. - `pred_original_sample` can be used to preview progress or for guidance. - """ - - prev_sample: torch.FloatTensor - pred_original_sample: Optional[torch.FloatTensor] = None - - -def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): - """ - Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of - (1-beta) over time from t = [0,1]. - - Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up - to that part of the diffusion process. - - - Args: - num_diffusion_timesteps (`int`): the number of betas to produce. - max_beta (`float`): the maximum beta to use; use values lower than 1 to - prevent singularities. - - Returns: - betas (`np.ndarray`): the betas used by the scheduler to step the model outputs - """ - - def alpha_bar(time_step): - return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2 - - betas = [] - for i in range(num_diffusion_timesteps): - t1 = i / num_diffusion_timesteps - t2 = (i + 1) / num_diffusion_timesteps - betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) - return torch.tensor(betas, dtype=torch.float32) - - -class ValueFunctionScheduler(SchedulerMixin, ConfigMixin): - """ - Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and - Langevin dynamics sampling. - - [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` - function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. - [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and - [`~ConfigMixin.from_config`] functions. - - For more details, see the original paper: https://arxiv.org/abs/2006.11239 - - Args: - num_train_timesteps (`int`): number of diffusion steps used to train the model. - beta_start (`float`): the starting `beta` value of inference. - beta_end (`float`): the final `beta` value. - beta_schedule (`str`): - the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from - `linear`, `scaled_linear`, or `squaredcos_cap_v2`. - trained_betas (`np.ndarray`, optional): - option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. - variance_type (`str`): - options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, - `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. - clip_sample (`bool`, default `True`): - option to clip predicted sample between -1 and 1 for numerical stability. - - """ - - @register_to_config - def __init__( - self, - num_train_timesteps: int = 1000, - beta_start: float = 0.0001, - beta_end: float = 0.02, - beta_schedule: str = "linear", - trained_betas: Optional[np.ndarray] = None, - variance_type: str = "fixed_small", - clip_sample: bool = True, - **kwargs, - ): - if "tensor_format" in kwargs: - warnings.warn( - "`tensor_format` is deprecated as an argument and will be removed in version `0.5.0`." - "If you're running your code in PyTorch, you can safely remove this argument.", - DeprecationWarning, - ) - - if trained_betas is not None: - self.betas = torch.from_numpy(trained_betas) - elif beta_schedule == "linear": - self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) - elif beta_schedule == "scaled_linear": - # this schedule is very specific to the latent diffusion model. - self.betas = ( - torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 - ) - elif beta_schedule == "squaredcos_cap_v2": - # Glide cosine schedule - self.betas = betas_for_alpha_bar(num_train_timesteps) - else: - raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") - - self.alphas = 1.0 - self.betas - self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) - self.one = torch.tensor(1.0) - - # setable values - self.num_inference_steps = None - self.timesteps = np.arange(0, num_train_timesteps)[::-1] - - self.variance_type = variance_type - - def set_timesteps(self, num_inference_steps: int): - """ - Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. - - Args: - num_inference_steps (`int`): - the number of diffusion steps used when generating samples with a pre-trained model. - """ - num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) - self.num_inference_steps = num_inference_steps - self.timesteps = np.arange( - 0, self.config.num_train_timesteps, self.config.num_train_timesteps // self.num_inference_steps - )[::-1] - - def _get_variance(self, t, predicted_variance=None, variance_type=None): - alpha_prod_t = self.alphas_cumprod[t] - alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one - - # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf) - # and sample from it to get previous sample - # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample - variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] - - if variance_type is None: - variance_type = self.config.variance_type - - # hacks - were probably added for training stability - if variance_type == "fixed_small": - variance = torch.clamp(variance, min=1e-20) - # for rl-diffuser https://arxiv.org/abs/2205.09991 - elif variance_type == "fixed_small_log": - variance = torch.log(torch.clamp(variance, min=1e-20)) - elif variance_type == "fixed_large": - variance = self.betas[t] - elif variance_type == "fixed_large_log": - # Glide max_log - variance = torch.log(self.betas[t]) - elif variance_type == "learned": - return predicted_variance - elif variance_type == "learned_range": - min_log = variance - max_log = self.betas[t] - frac = (predicted_variance + 1) / 2 - variance = frac * max_log + (1 - frac) * min_log - - return variance - - def step( - self, - model_output: torch.FloatTensor, - timestep: int, - sample: torch.FloatTensor, - generator=None, - scale=0.001, - return_dict: bool = True, - ) -> Union[ValueFunctionSchedulerOutput, Tuple]: - """ - Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion - process from the learned model outputs (most often the predicted noise). - - Args: - model_output (`torch.FloatTensor`): direct output from learned diffusion model. - timestep (`int`): current discrete timestep in the diffusion chain. - sample (`torch.FloatTensor`): - current instance of sample being created by diffusion process. - predict_epsilon (`bool`): - optional flag to use when model predicts the samples directly instead of the noise, epsilon. - generator: random number generator. - return_dict (`bool`): option for returning tuple rather than DDPMSchedulerOutput class - - Returns: - [`~schedulers.scheduling_utils.DDPMSchedulerOutput`] or `tuple`: - [`~schedulers.scheduling_utils.DDPMSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When - returning a tuple, the first element is the sample tensor. - - """ - t = timestep - - if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: - model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1) - else: - predicted_variance = None - - # 1. compute alphas, betas - alpha_prod_t = self.alphas_cumprod[t] - alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one - beta_prod_t = 1 - alpha_prod_t - beta_prod_t_prev = 1 - alpha_prod_t_prev - - # 2. compute predicted original sample from predicted noise also called - # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf - - pred_original_sample = model_output - - # 3. Clip "predicted x_0" - if self.config.clip_sample: - pred_original_sample = torch.clamp(pred_original_sample, -1, 1) - - # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t - # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf - pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * self.betas[t]) / beta_prod_t - current_sample_coeff = self.alphas[t] ** (0.5) * beta_prod_t_prev / beta_prod_t - - # 5. Compute predicted previous sample µ_t - # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf - pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample - - # 6. Add noise - variance = 0 - if t > 0: - noise = torch.randn( - model_output.size(), dtype=model_output.dtype, layout=model_output.layout, generator=generator - ).to(model_output.device) - variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * noise - - pred_prev_sample = pred_prev_sample + variance * noise - - if not return_dict: - return (pred_prev_sample,) - - return ValueFunctionSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample) - - def add_noise( - self, - original_samples: torch.FloatTensor, - noise: torch.FloatTensor, - timesteps: torch.IntTensor, - ) -> torch.FloatTensor: - if self.alphas_cumprod.device != original_samples.device: - self.alphas_cumprod = self.alphas_cumprod.to(original_samples.device) - - if timesteps.device != original_samples.device: - timesteps = timesteps.to(original_samples.device) - - sqrt_alpha_prod = self.alphas_cumprod[timesteps] ** 0.5 - sqrt_alpha_prod = sqrt_alpha_prod.flatten() - while len(sqrt_alpha_prod.shape) < len(original_samples.shape): - sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) - - sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps]) ** 0.5 - sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() - while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): - sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) - - noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise - return noisy_samples - - def __len__(self): - return self.config.num_train_timesteps From d37b472a6cd4bb7f0fe7098e065618337d75b62f Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Wed, 12 Oct 2022 18:02:32 -0400 Subject: [PATCH 16/32] use same api for rl model --- convert_model.py | 31 +++++++++----- src/diffusers/__init__.py | 1 - src/diffusers/models/__init__.py | 2 +- src/diffusers/models/unet_rl.py | 64 +++++++++++++++------------- src/diffusers/schedulers/__init__.py | 1 - 5 files changed, 55 insertions(+), 44 deletions(-) diff --git a/convert_model.py b/convert_model.py index cfb5db24570c..216c46cf3003 100644 --- a/convert_model.py +++ b/convert_model.py @@ -1,6 +1,6 @@ import torch -from diffusers import DDPMScheduler, UNet1DModel, ValueFunction, ValueFunctionScheduler +from diffusers import DDPMScheduler, UNet1DModel, ValueFunction import os import json os.makedirs("hub/hopper-medium-v2/unet/hor32", exist_ok=True) @@ -9,27 +9,36 @@ os.makedirs("hub/hopper-medium-v2/value_function", exist_ok=True) def unet(hor): + if hor == 128: + down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") + block_out_channels = (32, 128, 256) + up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D") + + elif hor == 32: + down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") + block_out_channels = (32, 64, 128, 256) + up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D", "UpResnetBlock1D") model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor{hor}.torch") state_dict = model.state_dict() - hf_value_function = UNet1DModel(dim=32, dim_mults=(1, 4, 8), transition_dim=14) + config = dict(down_block_types=down_block_types, block_out_channels=block_out_channels, up_block_types=up_block_types, layers_per_block=1) + hf_value_function = UNet1DModel(**config) + print(f"length of state dict: {len(state_dict.keys())}") + print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) for k, v in mapping.items(): state_dict[v] = state_dict.pop(k) hf_value_function.load_state_dict(state_dict) torch.save(hf_value_function.state_dict(), f"hub/hopper-medium-v2/unet/hor{hor}/diffusion_pytorch_model.bin") - if hor == 128: - dim_mults = (1, 4, 8) - elif hor == 32: - dim_mults = (1, 2, 4, 8) - config = dict(dim=32, dim_mults=dim_mults, transition_dim=14) with open(f"hub/hopper-medium-v2/unet/hor{hor}/config.json", "w") as f: json.dump(config, f) def value_function(): + config = dict(in_channels=14, down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), block_out_channels=(32, 64, 128, 256), layers_per_block=1) + model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-hor32.torch") state_dict = model.state_dict() - hf_value_function = ValueFunction(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) + hf_value_function = ValueFunction(**config) print(f"length of state dict: {len(state_dict.keys())}") print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") @@ -40,11 +49,11 @@ def value_function(): hf_value_function.load_state_dict(state_dict) torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2/value_function/diffusion_pytorch_model.bin") - config = dict(dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14) with open("hub/hopper-medium-v2/value_function/config.json", "w") as f: json.dump(config, f) if __name__ == "__main__": - unet() - value_function() \ No newline at end of file + # unet(32) + unet(128) + # value_function() \ No newline at end of file diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index c21ba3c7c3c2..645ba4604cea 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -37,7 +37,6 @@ PNDMScheduler, SchedulerMixin, ScoreSdeVeScheduler, - ValueFunctionScheduler ) from .training_utils import EMAModel else: diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index a1a5722ef5f8..355999f76688 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -20,7 +20,7 @@ from .unet_2d import UNet2DModel from .unet_2d_condition import UNet2DConditionModel from .vae import AutoencoderKL, VQModel - from .unet_rl import UNetRLModel + from .unet_rl import ValueFunction if is_flax_available(): from .unet_2d_condition_flax import FlaxUNet2DConditionModel diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index fe33259cda40..6584bd13eda3 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -5,8 +5,8 @@ import torch import torch.nn as nn -from diffusers.models.resnet import ResidualTemporalBlock1D -from diffusers.models.unet_1d_blocks import DownResnetBlock1D, UpResnetBlock1D, Downsample1D +from diffusers.models.resnet import ResidualTemporalBlock1D, Downsample1D +from diffusers.models.unet_1d_blocks import get_down_block from ..configuration_utils import ConfigMixin, register_to_config from ..modeling_utils import ModelMixin @@ -30,45 +30,49 @@ class ValueFunction(ModelMixin, ConfigMixin): @register_to_config def __init__( self, - transition_dim=14, - dim=32, - dim_mults=(1, 4, 8), + in_channels=14, + down_block_types: Tuple[str] = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), + block_out_channels: Tuple[int] = (32, 64, 128, 256), + act_fn: str = "mish", + norm_num_groups: int = 8, + layers_per_block: int = 1, ): super().__init__() + time_embed_dim = block_out_channels[0] * 4 + self.time_proj = Timesteps(num_channels=block_out_channels[0], flip_sin_to_cos=False, downscale_freq_shift=1) + self.time_mlp = TimestepEmbedding(channel=block_out_channels[0], time_embed_dim=time_embed_dim, act_fn="mish", out_dim=block_out_channels[0]) - self.transition_dim = transition_dim - self.time_proj = Timesteps(num_channels=dim, flip_sin_to_cos=False, downscale_freq_shift=1) - self.time_mlp = TimestepEmbedding(channel=dim, time_embed_dim=4 * dim, act_fn="mish", out_dim=dim) - - dims = [transition_dim, *map(lambda m: dim * m, dim_mults)] - in_out = list(zip(dims[:-1], dims[1:])) self.blocks = nn.ModuleList([]) - num_resolutions = len(in_out) - - for ind, (dim_in, dim_out) in enumerate(in_out): - is_last = ind >= (num_resolutions - 1) - - self.blocks.append( - DownResnetBlock1D( - in_channels=dim_in, out_channels=dim_out, temb_channels=dim, add_downsample=True - ) + mid_dim = block_out_channels[-1] + + output_channel = in_channels + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block_type = down_block_types[i] + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=block_out_channels[0], + add_downsample=True, ) + self.blocks.append(down_block) - - mid_dim = dims[-1] - mid_dim_2 = mid_dim // 2 - mid_dim_3 = mid_dim // 4 ## - self.mid_block1 = ResidualTemporalBlock1D(mid_dim, mid_dim_2, embed_dim=dim) - self.mid_down1 = Downsample1D(mid_dim_2, use_conv=True) + self.mid_block1 = ResidualTemporalBlock1D(mid_dim, mid_dim // 2, embed_dim=block_out_channels[0]) + self.mid_down1 = Downsample1D(mid_dim // 2, use_conv=True) ## - self.mid_block2 = ResidualTemporalBlock1D(mid_dim_2, mid_dim_3, embed_dim=dim) - self.mid_down2 = Downsample1D(mid_dim_3, use_conv=True) + self.mid_block2 = ResidualTemporalBlock1D(mid_dim //2, mid_dim // 4, embed_dim=block_out_channels[0]) + self.mid_down2 = Downsample1D(mid_dim // 4, use_conv=True) ## - fc_dim = mid_dim_3 + fc_dim = mid_dim // 4 self.final_block = nn.ModuleList([ - nn.Linear(fc_dim + dim, fc_dim // 2), + nn.Linear(fc_dim + block_out_channels[0], fc_dim // 2), nn.Mish(), nn.Linear(fc_dim // 2, 1),] ) diff --git a/src/diffusers/schedulers/__init__.py b/src/diffusers/schedulers/__init__.py index c4770de538cc..a906c39eb24c 100644 --- a/src/diffusers/schedulers/__init__.py +++ b/src/diffusers/schedulers/__init__.py @@ -24,7 +24,6 @@ from .scheduling_sde_ve import ScoreSdeVeScheduler from .scheduling_sde_vp import ScoreSdeVpScheduler from .scheduling_utils import SchedulerMixin - from .scheduling_value_function import ValueFunctionScheduler else: from ..utils.dummy_pt_objects import * # noqa F403 From aa19286a6fccc5c7fdd5d1332c113592bca40a76 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 13 Oct 2022 12:53:44 -0400 Subject: [PATCH 17/32] fix variance type --- convert_model.py | 6 +- examples/diffuser/helpers.py | 9 +-- .../diffuser/run_diffuser_value_guided.py | 65 ++++++++++--------- src/diffusers/schedulers/scheduling_ddpm.py | 8 ++- 4 files changed, 50 insertions(+), 38 deletions(-) diff --git a/convert_model.py b/convert_model.py index 216c46cf3003..85f96f0a743b 100644 --- a/convert_model.py +++ b/convert_model.py @@ -54,6 +54,6 @@ def value_function(): if __name__ == "__main__": - # unet(32) - unet(128) - # value_function() \ No newline at end of file + unet(32) + # unet(128) + value_function() \ No newline at end of file diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index 6866c415f4e4..a088635d7d37 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -49,13 +49,14 @@ def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim grad = torch.autograd.grad([y.sum()], [x])[0] if config['scale_grad_by_std']: posterior_variance = scheduler._get_variance(i) - grad = posterior_variance * 0.5 * grad + model_std = torch.exp(0.5 * posterior_variance) + grad = model_std * grad grad[timesteps < config['t_grad_cutoff']] = 0 x = x.detach() x = x + config['scale'] * grad x = reset_x0(x, conditions, action_dim) - with torch.no_grad(): - prev_x = unet(x, timesteps).sample + # with torch.no_grad(): + prev_x = unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) x = scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] # 3. [optional] add posterior noise to the sample @@ -68,7 +69,7 @@ def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim # 4. apply conditions to the trajectory x = reset_x0(x, conditions, action_dim) - x = to_torch(x) + x = to_torch(x, device=config['device']) # y = network(x, timesteps).sample return x, y diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 6c29acb62b7c..7d5588149fa0 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -4,7 +4,7 @@ import tqdm import numpy as np import gym -from diffusers import DDPMScheduler, UNet1DModel, ValueFunction, ValueFunctionScheduler +from diffusers import DDPMScheduler, UNet1DModel, ValueFunction from helpers import MuJoCoRenderer, show_sample import helpers import wandb @@ -39,27 +39,20 @@ config = dict( n_samples=64, - horizon=128, - num_inference_steps=200, - n_guide_steps=0, + horizon=32, + num_inference_steps=20, + n_guide_steps=2, scale_grad_by_std=True, - scale=0.001, + scale=0.1, eta=0.0, - t_grad_cutoff=4, - device='cuda' + t_grad_cutoff=2, + device='cpu' ) -@stub.function( - image=image, - secret=modal.Secret.from_name("wandb-api-key"), - mounts=modal.create_package_mounts(["diffusers"]), - gpu=True -) -def run(): - wandb.login(key=os.environ["WANDB_API_KEY"]) +def _run(): wandb.init(project="diffusers-value-guided-rl") - - env_name = "hopper-medium-expert-v2" + wandb.config.update(config) + env_name = "hopper-medium-v2" env = gym.make(env_name) data = env.get_dataset() # dataset is only used for normalization in this colab render = MuJoCoRenderer(env) @@ -77,14 +70,14 @@ def run(): # generator = torch.Generator(device='cuda') generator = torch.Generator(device=DEVICE) - scheduler = DDPMScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False, ) + scheduler = DDPMScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False, variance_type="fixed_small_log") # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) - network = ValueFunction.from_pretrained("bglick13/hopper-medium-expert-v2-value-function-hor32").to(device=DEVICE).eval() - unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-expert-v2-unet-hor128").to(device=DEVICE).eval() + network = ValueFunction.from_pretrained("bglick13/hopper-medium-v2-value-function-hor32").to(device=DEVICE).eval() + unet = UNet1DModel.from_pretrained(f"bglick13/hopper-medium-v2-unet-hor32").to(device=DEVICE).eval() # unet = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) @@ -92,8 +85,9 @@ def run(): ## [ observation_dim ] --> [ n_samples x observation_dim ] obs = env.reset() total_reward = 0 + total_score = 0 done = False - T = 200 + T = 1000 rollout = [obs.copy()] trajectories = [] y_maxes = [0] @@ -125,7 +119,7 @@ def run(): # convert a np observation to torch for model forward pass x = helpers.to_torch(x, device=DEVICE) x, y = helpers.run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config) - if y: + if y is not None: sorted_idx = y.argsort(0, descending=True).squeeze() y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) sorted_values = x[sorted_idx] @@ -139,17 +133,18 @@ def run(): actions = actions.detach().cpu().numpy() denorm_actions = helpers.de_normalize(actions, data, key='actions') - denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] - # denorm_actions = denorm_actions[0, 0] + # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] + denorm_actions = denorm_actions[0, 0] ## execute action in environment next_observation, reward, terminal, _ = env.step(denorm_actions) - + score = env.get_normalized_score(total_reward) ## update return total_reward += reward - wandb.log({"total_reward": total_reward, "reward": reward, "y_max": y_maxes[-1], "diff_from_expert_reward": reward - data['rewards'][t]}) - print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") + total_score += score + wandb.log({"total_reward": total_reward, "reward": reward, "score": score, "total_score": total_score, "y_max": y_maxes[-1], "diff_from_expert_reward": reward - data['rewards'][t]}) + print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score: {total_score}") # save observations for rendering rollout.append(next_observation.copy()) @@ -162,8 +157,18 @@ def run(): images = show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) wandb.log({"rollout": wandb.Video("videos/sample.mp4", fps=60, format='mp4')}) +@stub.function( + image=image, + secret=modal.Secret.from_name("wandb-api-key"), + mounts=modal.create_package_mounts(["diffusers"]), + gpu=True +) +def run(): + wandb.login(key=os.environ["WANDB_API_KEY"]) + _run() + if __name__ == "__main__": - # run() - with stub.run(): - run() + _run() + # with stub.run(): + # run() diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 04c92904a660..98f15f8dbb64 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -283,7 +283,13 @@ def step( noise = torch.randn( model_output.size(), dtype=model_output.dtype, layout=model_output.layout, generator=generator ).to(model_output.device) - variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * noise + if self.variance_type == "fixed_small_log": + variance = (self._get_variance(t, predicted_variance=predicted_variance)) + variance = torch.exp(0.5 * variance) + variance = variance * noise + + else: + variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * noise pred_prev_sample = pred_prev_sample + variance From 02293e2d9b686ee064c4c56511528f98202354bf Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 13 Oct 2022 18:52:22 -0400 Subject: [PATCH 18/32] wrong normalization function --- convert_model.py | 8 ++++---- examples/diffuser/helpers.py | 16 +++++++--------- examples/diffuser/run_diffuser_value_guided.py | 17 +++++++++++------ 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/convert_model.py b/convert_model.py index 85f96f0a743b..4691c69239d7 100644 --- a/convert_model.py +++ b/convert_model.py @@ -18,7 +18,7 @@ def unet(hor): down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") block_out_channels = (32, 64, 128, 256) up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D", "UpResnetBlock1D") - model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-hor{hor}.torch") + model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-mediumv2-hor{hor}.torch") state_dict = model.state_dict() config = dict(down_block_types=down_block_types, block_out_channels=block_out_channels, up_block_types=up_block_types, layers_per_block=1) hf_value_function = UNet1DModel(**config) @@ -36,13 +36,13 @@ def unet(hor): def value_function(): config = dict(in_channels=14, down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), block_out_channels=(32, 64, 128, 256), layers_per_block=1) - model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-hor32.torch") - state_dict = model.state_dict() + model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-mediumv2-hor32.torch") + state_dict = model hf_value_function = ValueFunction(**config) print(f"length of state dict: {len(state_dict.keys())}") print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") - mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) + mapping = dict((k, hfk) for k, hfk in zip(state_dict.keys(), hf_value_function.state_dict().keys())) for k, v in mapping.items(): state_dict[v] = state_dict.pop(k) diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index a088635d7d37..633a7eedfce8 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -9,16 +9,14 @@ DTYPE = torch.float def normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = 2*(x_in - lower)/(upper-lower) - 1 - return x_out + means = data[key].mean(axis=0) + stds = data[key].std(axis=0) + return (x_in - means) / stds def de_normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = lower + (upper - lower)*(1 + x_in) /2 - return x_out + means = data[key].mean(axis=0) + stds = data[key].std(axis=0) + return x_in * stds + means def to_torch(x_in, dtype=None, device='cuda'): dtype = dtype or DTYPE @@ -61,7 +59,7 @@ def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim # 3. [optional] add posterior noise to the sample if config['eta'] > 0: - noise = torch.randn(x.shape, generator=generator).to(x.device) + noise = torch.randn(x.shape).to(x.device) posterior_variance = scheduler._get_variance(i) # * noise # no noise when t == 0 # NOTE: original implementation missing sqrt on posterior_variance diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 7d5588149fa0..529f4bbb66ac 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -10,6 +10,9 @@ import wandb import modal import os +from pytorch_lightning import seed_everything + +seed_everything(0) stub = modal.Stub("diffusers-value-guided") image = modal.Image.debian_slim().apt_install([ @@ -34,7 +37,8 @@ "mediapy", "Pillow==9.0.0", "moviepy", - "imageio" + "imageio", + "pytorch-lightning", ]) config = dict( @@ -46,7 +50,7 @@ scale=0.1, eta=0.0, t_grad_cutoff=2, - device='cpu' + device='cuda' ) def _run(): @@ -83,6 +87,7 @@ def _run(): ## add a batch dimension and repeat for multiple samples ## [ observation_dim ] --> [ n_samples x observation_dim ] + env.seed(0) obs = env.reset() total_reward = 0 total_score = 0 @@ -109,7 +114,7 @@ def _run(): shape = (batch_size, config['horizon'], state_dim+action_dim) # sample random initial noise vector - x1 = torch.randn(shape, device=DEVICE, generator=generator) + x1 = torch.randn(shape, device=DEVICE) # this model is conditioned from an initial state, so you will see this function # multiple times to change the initial state of generated data to the state @@ -169,6 +174,6 @@ def run(): if __name__ == "__main__": - _run() - # with stub.run(): - # run() + # _run() + with stub.run(): + run() From 56818e58c6423d4eee4a415dcf49995fb652932c Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Mon, 17 Oct 2022 14:48:23 -0400 Subject: [PATCH 19/32] add tests --- tests/test_models_unet.py | 85 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index e1dbdfaa4611..55f373af8a9b 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -20,7 +20,7 @@ import torch -from diffusers import UNet1DModel, UNet2DConditionModel, UNet2DModel +from diffusers import UNet1DModel, UNet2DConditionModel, UNet2DModel, ValueFunction from diffusers.utils import floats_tensor, slow, torch_device from .test_modeling_common import ModelTesterMixin @@ -524,3 +524,86 @@ def test_output_pretrained(self): def test_forward_with_norm_groups(self): # Not implemented yet for this UNet pass + + +class UNetRLModelTests(ModelTesterMixin, unittest.TestCase): + model_class = ValueFunction + + @property + def dummy_input(self): + batch_size = 4 + num_features = 14 + seq_len = 16 + + noise = floats_tensor((batch_size, num_features, seq_len)).to(torch_device) + time_step = torch.tensor([10] * batch_size).to(torch_device) + + return {"sample": noise, "timestep": time_step} + + @property + def input_shape(self): + return (4, 14, 16) + + @property + def output_shape(self): + return (4, 14, 1) + + def test_ema_training(self): + pass + + def test_training(self): + pass + + def prepare_init_args_and_inputs_for_common(self): + init_dict = { + "block_out_channels": (32, 64, 128, 256), + "in_channels": 14, + "out_channels": 14, + } + inputs_dict = self.dummy_input + return init_dict, inputs_dict + + def test_from_pretrained_hub(self): + unet, loading_info = UNet1DModel.from_pretrained( + "bglick13/hopper-medium-v2-unet-hor32", output_loading_info=True + ) + value_function, vf_loading_info = ValueFunction.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", output_loading_info=True + ) + self.assertIsNotNone(unet) + self.assertEqual(len(loading_info["missing_keys"]), 0) + self.assertIsNotNone(value_function) + self.assertEqual(len(vf_loading_info["missing_keys"]), 0) + + unet.to(torch_device) + value_function.to(torch_device) + image = value_function(**self.dummy_input) + + assert image is not None, "Make sure output is not None" + + def test_output_pretrained(self): + value_function, vf_loading_info = ValueFunction.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", output_loading_info=True + ) + torch.manual_seed(0) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(0) + + num_features = value_function.in_channels + seq_len = 14 + noise = torch.randn((1, seq_len, num_features)).permute( + 0, 2, 1 + ) # match original, we can update values and remove + time_step = torch.full((num_features,), 0) + + with torch.no_grad(): + output = value_function(noise, time_step).sample + + # fmt: off + expected_output_slice = torch.tensor([207.0272] * seq_len) + # fmt: on + self.assertTrue(torch.allclose(output, expected_output_slice, rtol=1e-3)) + + def test_forward_with_norm_groups(self): + # Not implemented yet for this UNet + pass From d085725ba387adc03067445a8dc81d5fa63dff7a Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Mon, 17 Oct 2022 14:53:09 -0400 Subject: [PATCH 20/32] style --- examples/diffuser/helpers.py | 149 +++++++------- examples/diffuser/run_diffuser.py | 214 ++++++++++---------- src/diffusers/__init__.py | 2 +- src/diffusers/models/__init__.py | 2 +- src/diffusers/models/unet_rl.py | 33 +-- src/diffusers/schedulers/scheduling_ddpm.py | 2 +- 6 files changed, 213 insertions(+), 189 deletions(-) diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index 633a7eedfce8..c2ec457abad7 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -1,86 +1,94 @@ import os -import mediapy as media +import warnings + import numpy as np import torch + import gym -import warnings +import mediapy as media import tqdm DTYPE = torch.float + + def normalize(x_in, data, key): means = data[key].mean(axis=0) stds = data[key].std(axis=0) return (x_in - means) / stds + def de_normalize(x_in, data, key): means = data[key].mean(axis=0) stds = data[key].std(axis=0) return x_in * stds + means - -def to_torch(x_in, dtype=None, device='cuda'): - dtype = dtype or DTYPE - device = device - if type(x_in) is dict: - return {k: to_torch(v, dtype, device) for k, v in x_in.items()} - elif torch.is_tensor(x_in): - return x_in.to(device).type(dtype) - return torch.tensor(x_in, dtype=dtype, device=device) + + +def to_torch(x_in, dtype=None, device="cuda"): + dtype = dtype or DTYPE + device = device + if type(x_in) is dict: + return {k: to_torch(v, dtype, device) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(device).type(dtype) + return torch.tensor(x_in, dtype=dtype, device=device) def reset_x0(x_in, cond, act_dim): - for key, val in cond.items(): - x_in[:, key, act_dim:] = val.clone() - return x_in + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config): y = None for i in tqdm.tqdm(scheduler.timesteps): - # create batch of timesteps to pass into model - timesteps = torch.full((config['n_samples'],), i, device=config['device'], dtype=torch.long) + timesteps = torch.full((config["n_samples"],), i, device=config["device"], dtype=torch.long) # 3. call the sample function - for _ in range(config['n_guide_steps']): + for _ in range(config["n_guide_steps"]): with torch.enable_grad(): x.requires_grad_() y = network(x, timesteps).sample grad = torch.autograd.grad([y.sum()], [x])[0] - if config['scale_grad_by_std']: + if config["scale_grad_by_std"]: posterior_variance = scheduler._get_variance(i) model_std = torch.exp(0.5 * posterior_variance) grad = model_std * grad - grad[timesteps < config['t_grad_cutoff']] = 0 + grad[timesteps < config["t_grad_cutoff"]] = 0 x = x.detach() - x = x + config['scale'] * grad + x = x + config["scale"] * grad x = reset_x0(x, conditions, action_dim) # with torch.no_grad(): prev_x = unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) x = scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] - + # 3. [optional] add posterior noise to the sample - if config['eta'] > 0: + if config["eta"] > 0: noise = torch.randn(x.shape).to(x.device) - posterior_variance = scheduler._get_variance(i) # * noise + posterior_variance = scheduler._get_variance(i) # * noise # no noise when t == 0 # NOTE: original implementation missing sqrt on posterior_variance - x = x + int(i>0) * (0.5 * posterior_variance) * config['eta'] * noise # MJ had as log var, exponentiated + x = x + int(i > 0) * (0.5 * posterior_variance) * config["eta"] * noise # MJ had as log var, exponentiated # 4. apply conditions to the trajectory x = reset_x0(x, conditions, action_dim) - x = to_torch(x, device=config['device']) + x = to_torch(x, device=config["device"]) # y = network(x, timesteps).sample return x, y + def to_np(x_in): - if torch.is_tensor(x_in): - x_in = x_in.detach().cpu().numpy() - return x_in + if torch.is_tensor(x_in): + x_in = x_in.detach().cpu().numpy() + return x_in -# from MJ's Diffuser code + +# from MJ's Diffuser code # https://github.com/jannerm/diffuser/blob/76ae49ae85ba1c833bf78438faffdc63b8b4d55d/diffuser/utils/colab.py#L79 def mkdir(savepath): """ - returns `True` iff `savepath` is created + returns `True` iff `savepath` is created """ if not os.path.exists(savepath): os.makedirs(savepath) @@ -89,10 +97,10 @@ def mkdir(savepath): return False -def show_sample(renderer, observations, filename='sample.mp4', savebase='videos'): - ''' +def show_sample(renderer, observations, filename="sample.mp4", savebase="videos"): + """ observations : [ batch_size x horizon x observation_dim ] - ''' + """ mkdir(savebase) savepath = os.path.join(savebase, filename) @@ -106,52 +114,58 @@ def show_sample(renderer, observations, filename='sample.mp4', savebase='videos' ## [ horizon x height x (batch_size * width) x channels ] images = np.concatenate(images, axis=2) media.write_video(savepath, images, fps=60) - media.show_video(images, codec='h264', fps=60) + media.show_video(images, codec="h264", fps=60) return images + # Code adapted from Michael Janner # source: https://github.com/jannerm/diffuser/blob/main/diffuser/utils/rendering.py import mujoco_py as mjc + def env_map(env_name): - ''' - map D4RL dataset names to custom fully-observed - variants for rendering - ''' - if 'halfcheetah' in env_name: - return 'HalfCheetahFullObs-v2' - elif 'hopper' in env_name: - return 'HopperFullObs-v2' - elif 'walker2d' in env_name: - return 'Walker2dFullObs-v2' + """ + map D4RL dataset names to custom fully-observed + variants for rendering + """ + if "halfcheetah" in env_name: + return "HalfCheetahFullObs-v2" + elif "hopper" in env_name: + return "HopperFullObs-v2" + elif "walker2d" in env_name: + return "Walker2dFullObs-v2" else: return env_name + def get_image_mask(img): background = (img == 255).all(axis=-1, keepdims=True) mask = ~background.repeat(3, axis=-1) return mask + def atmost_2d(x): while x.ndim > 2: x = x.squeeze(0) return x + def set_state(env, state): qpos_dim = env.sim.data.qpos.size qvel_dim = env.sim.data.qvel.size if not state.size == qpos_dim + qvel_dim: warnings.warn( - f'[ utils/rendering ] Expected state of size {qpos_dim + qvel_dim}, ' - f'but got state of size {state.size}') - state = state[:qpos_dim + qvel_dim] + f"[ utils/rendering ] Expected state of size {qpos_dim + qvel_dim}, but got state of size {state.size}" + ) + state = state[: qpos_dim + qvel_dim] env.set_state(state[:qpos_dim], state[qpos_dim:]) + class MuJoCoRenderer: - ''' - default mujoco renderer - ''' + """ + default mujoco renderer + """ def __init__(self, env): if type(env) is str: @@ -166,14 +180,16 @@ def __init__(self, env): try: self.viewer = mjc.MjRenderContextOffscreen(self.env.sim) except: - print('[ utils/rendering ] Warning: could not initialize offscreen renderer') + print("[ utils/rendering ] Warning: could not initialize offscreen renderer") self.viewer = None def pad_observation(self, observation): - state = np.concatenate([ - np.zeros(1), - observation, - ]) + state = np.concatenate( + [ + np.zeros(1), + observation, + ] + ) return state def pad_observations(self, observations): @@ -182,14 +198,16 @@ def pad_observations(self, observations): xvel_dim = qpos_dim - 1 xvel = observations[:, xvel_dim] xpos = np.cumsum(xvel) * self.env.dt - states = np.concatenate([ - xpos[:,None], - observations, - ], axis=-1) + states = np.concatenate( + [ + xpos[:, None], + observations, + ], + axis=-1, + ) return states def render(self, observation, dim=256, partial=False, qvel=True, render_kwargs=None, conditions=None): - if type(dim) == int: dim = (dim, dim) @@ -198,15 +216,10 @@ def render(self, observation, dim=256, partial=False, qvel=True, render_kwargs=N if render_kwargs is None: xpos = observation[0] if not partial else 0 - render_kwargs = { - 'trackbodyid': 2, - 'distance': 3, - 'lookat': [xpos, -0.5, 1], - 'elevation': -20 - } + render_kwargs = {"trackbodyid": 2, "distance": 3, "lookat": [xpos, -0.5, 1], "elevation": -20} for key, val in render_kwargs.items(): - if key == 'lookat': + if key == "lookat": self.viewer.cam.lookat[:] = val[:] else: setattr(self.viewer.cam, key, val) @@ -251,4 +264,4 @@ def renders(self, samples, partial=False, **kwargs): return composite def __call__(self, *args, **kwargs): - return self.renders(*args, **kwargs) \ No newline at end of file + return self.renders(*args, **kwargs) diff --git a/examples/diffuser/run_diffuser.py b/examples/diffuser/run_diffuser.py index 97f38fd3f261..e69ffc117e3e 100644 --- a/examples/diffuser/run_diffuser.py +++ b/examples/diffuser/run_diffuser.py @@ -1,66 +1,71 @@ -import d4rl - -import torch -import tqdm import numpy as np -import gym +import torch + +import d4rl +import gym import helpers +import tqdm + env_name = "hopper-medium-expert-v2" env = gym.make(env_name) -data = env.get_dataset() # dataset is only used for normalization in this colab +data = env.get_dataset() # dataset is only used for normalization in this colab # Cuda settings for colab # torch.cuda.get_device_name(0) -DEVICE = 'cpu' +DEVICE = "cpu" DTYPE = torch.float # diffusion model settings -n_samples = 4 # number of trajectories planned via diffusion -horizon = 128 # length of sampled trajectories -state_dim = env.observation_space.shape[0] +n_samples = 4 # number of trajectories planned via diffusion +horizon = 128 # length of sampled trajectories +state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] -num_inference_steps = 100 # number of difusion steps +num_inference_steps = 100 # number of difusion steps + def normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = 2*(x_in - lower)/(upper-lower) - 1 - return x_out + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = 2 * (x_in - lower) / (upper - lower) - 1 + return x_out + def de_normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = lower + (upper - lower)*(1 + x_in) /2 - return x_out - -def to_torch(x_in, dtype=None, device=None): - dtype = dtype or DTYPE - device = device or DEVICE - if type(x_in) is dict: - return {k: to_torch(v, dtype, device) for k, v in x_in.items()} - elif torch.is_tensor(x_in): - return x_in.to(device).type(dtype) - return torch.tensor(x_in, dtype=dtype, device=device) + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = lower + (upper - lower) * (1 + x_in) / 2 + return x_out +def to_torch(x_in, dtype=None, device=None): + dtype = dtype or DTYPE + device = device or DEVICE + if type(x_in) is dict: + return {k: to_torch(v, dtype, device) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(device).type(dtype) + return torch.tensor(x_in, dtype=dtype, device=device) + from diffusers import DDPMScheduler, TemporalUNet + # Two generators for different parts of the diffusion loop to work in colab -generator_cpu = torch.Generator(device='cpu') +generator_cpu = torch.Generator(device="cpu") -scheduler = DDPMScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2") +scheduler = DDPMScheduler(num_train_timesteps=100, beta_schedule="squaredcos_cap_v2") -# 3 different pretrained models are available for this task. +# 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) def reset_x0(x_in, cond, act_dim): - for key, val in cond.items(): - x_in[:, key, act_dim:] = val.clone() - return x_in + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + # network specific constants for inference clip_denoised = network.clip_denoised @@ -75,77 +80,76 @@ def reset_x0(x_in, cond, act_dim): rollout = [obs.copy()] try: - for t in tqdm.tqdm(range(T)): - obs_raw = obs - - # normalize observations for forward passes - obs = normalize(obs, data, 'observations') - obs = obs[None].repeat(n_samples, axis=0) - conditions = { - 0: to_torch(obs, device=DEVICE) - } - - # constants for inference - batch_size = len(conditions[0]) - shape = (batch_size, horizon, state_dim+action_dim) - - # sample random initial noise vector - x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) - - # this model is conditioned from an initial state, so you will see this function - # multiple times to change the initial state of generated data to the state - # generated via env.reset() above or env.step() below - x = reset_x0(x1, conditions, action_dim) - - # convert a np observation to torch for model forward pass - x = to_torch(x) - - eta = 1.0 # noise factor for sampling reconstructed state - - # run the diffusion process - # for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): - for i in tqdm.tqdm(scheduler.timesteps): - - # create batch of timesteps to pass into model - timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) - - # 1. generate prediction from model - with torch.no_grad(): - residual = network(x, timesteps).sample - - # 2. use the model prediction to reconstruct an observation (de-noise) - obs_reconstruct = scheduler.step(residual, i, x, predict_epsilon=predict_epsilon)["prev_sample"] - - # 3. [optional] add posterior noise to the sample - if eta > 0: - noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) - posterior_variance = scheduler._get_variance(i) # * noise - # no noise when t == 0 - # NOTE: original implementation missing sqrt on posterior_variance - obs_reconstruct = obs_reconstruct + int(i>0) * (0.5 * posterior_variance) * eta* noise # MJ had as log var, exponentiated - - # 4. apply conditions to the trajectory - obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) - x = to_torch(obs_reconstruct_postcond) - plans = helpers.to_np(x[:,:,:action_dim]) - # select random plan - idx = np.random.randint(plans.shape[0]) - # select action at correct time - action = plans[idx, 0, :] - actions= de_normalize(action, data, 'actions') - ## execute action in environment - next_observation, reward, terminal, _ = env.step(action) - - ## update return - total_reward += reward - print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") - - # save observations for rendering - rollout.append(next_observation.copy()) - obs = next_observation + for t in tqdm.tqdm(range(T)): + obs_raw = obs + + # normalize observations for forward passes + obs = normalize(obs, data, "observations") + obs = obs[None].repeat(n_samples, axis=0) + conditions = {0: to_torch(obs, device=DEVICE)} + + # constants for inference + batch_size = len(conditions[0]) + shape = (batch_size, horizon, state_dim + action_dim) + + # sample random initial noise vector + x1 = torch.randn(shape, device=DEVICE, generator=generator_cpu) + + # this model is conditioned from an initial state, so you will see this function + # multiple times to change the initial state of generated data to the state + # generated via env.reset() above or env.step() below + x = reset_x0(x1, conditions, action_dim) + + # convert a np observation to torch for model forward pass + x = to_torch(x) + + eta = 1.0 # noise factor for sampling reconstructed state + + # run the diffusion process + # for i in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps): + for i in tqdm.tqdm(scheduler.timesteps): + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=DEVICE, dtype=torch.long) + + # 1. generate prediction from model + with torch.no_grad(): + residual = network(x, timesteps).sample + + # 2. use the model prediction to reconstruct an observation (de-noise) + obs_reconstruct = scheduler.step(residual, i, x, predict_epsilon=predict_epsilon)["prev_sample"] + + # 3. [optional] add posterior noise to the sample + if eta > 0: + noise = torch.randn(obs_reconstruct.shape, generator=generator_cpu).to(obs_reconstruct.device) + posterior_variance = scheduler._get_variance(i) # * noise + # no noise when t == 0 + # NOTE: original implementation missing sqrt on posterior_variance + obs_reconstruct = ( + obs_reconstruct + int(i > 0) * (0.5 * posterior_variance) * eta * noise + ) # MJ had as log var, exponentiated + + # 4. apply conditions to the trajectory + obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) + x = to_torch(obs_reconstruct_postcond) + plans = helpers.to_np(x[:, :, :action_dim]) + # select random plan + idx = np.random.randint(plans.shape[0]) + # select action at correct time + action = plans[idx, 0, :] + actions = de_normalize(action, data, "actions") + ## execute action in environment + next_observation, reward, terminal, _ = env.step(action) + + ## update return + total_reward += reward + print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") + + # save observations for rendering + rollout.append(next_observation.copy()) + obs = next_observation except KeyboardInterrupt: - pass + pass print(f"Total reward: {total_reward}") -render =helpers.MuJoCoRenderer(env) -helpers.show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) \ No newline at end of file +render = helpers.MuJoCoRenderer(env) +helpers.show_sample(render, np.expand_dims(np.stack(rollout), axis=0)) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 645ba4604cea..7088e560dd66 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -18,7 +18,7 @@ if is_torch_available(): from .modeling_utils import ModelMixin - from .models import AutoencoderKL, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel, ValueFunction + from .models import AutoencoderKL, UNet1DModel, UNet2DConditionModel, UNet2DModel, ValueFunction, VQModel from .optimization import ( get_constant_schedule, get_constant_schedule_with_warmup, diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index 355999f76688..b771aaac8467 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -19,8 +19,8 @@ from .unet_1d import UNet1DModel from .unet_2d import UNet2DModel from .unet_2d_condition import UNet2DConditionModel - from .vae import AutoencoderKL, VQModel from .unet_rl import ValueFunction + from .vae import AutoencoderKL, VQModel if is_flax_available(): from .unet_2d_condition_flax import FlaxUNet2DConditionModel diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index 6584bd13eda3..b6e052c8922f 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -5,21 +5,20 @@ import torch import torch.nn as nn -from diffusers.models.resnet import ResidualTemporalBlock1D, Downsample1D +from diffusers.models.resnet import Downsample1D, ResidualTemporalBlock1D from diffusers.models.unet_1d_blocks import get_down_block from ..configuration_utils import ConfigMixin, register_to_config from ..modeling_utils import ModelMixin from ..utils import BaseOutput from .embeddings import TimestepEmbedding, Timesteps -from .resnet import rearrange_dims @dataclass class ValueFunctionOutput(BaseOutput): """ Args: - sample (`torch.FloatTensor` of shape `(batch, horizon, obs_dimension)`): + sample (`torch.FloatTensor` of shape `(batch, horizon, 1)`): Hidden states output. Output of last layer of model. """ @@ -31,7 +30,12 @@ class ValueFunction(ModelMixin, ConfigMixin): def __init__( self, in_channels=14, - down_block_types: Tuple[str] = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), + down_block_types: Tuple[str] = ( + "DownResnetBlock1D", + "DownResnetBlock1D", + "DownResnetBlock1D", + "DownResnetBlock1D", + ), block_out_channels: Tuple[int] = (32, 64, 128, 256), act_fn: str = "mish", norm_num_groups: int = 8, @@ -40,8 +44,9 @@ def __init__( super().__init__() time_embed_dim = block_out_channels[0] * 4 self.time_proj = Timesteps(num_channels=block_out_channels[0], flip_sin_to_cos=False, downscale_freq_shift=1) - self.time_mlp = TimestepEmbedding(channel=block_out_channels[0], time_embed_dim=time_embed_dim, act_fn="mish", out_dim=block_out_channels[0]) - + self.time_mlp = TimestepEmbedding( + channel=block_out_channels[0], time_embed_dim=time_embed_dim, act_fn="mish", out_dim=block_out_channels[0] + ) self.blocks = nn.ModuleList([]) mid_dim = block_out_channels[-1] @@ -67,14 +72,16 @@ def __init__( self.mid_block1 = ResidualTemporalBlock1D(mid_dim, mid_dim // 2, embed_dim=block_out_channels[0]) self.mid_down1 = Downsample1D(mid_dim // 2, use_conv=True) ## - self.mid_block2 = ResidualTemporalBlock1D(mid_dim //2, mid_dim // 4, embed_dim=block_out_channels[0]) + self.mid_block2 = ResidualTemporalBlock1D(mid_dim // 2, mid_dim // 4, embed_dim=block_out_channels[0]) self.mid_down2 = Downsample1D(mid_dim // 4, use_conv=True) ## fc_dim = mid_dim // 4 - self.final_block = nn.ModuleList([ - nn.Linear(fc_dim + block_out_channels[0], fc_dim // 2), - nn.Mish(), - nn.Linear(fc_dim // 2, 1),] + self.final_block = nn.ModuleList( + [ + nn.Linear(fc_dim + block_out_channels[0], fc_dim // 2), + nn.Mish(), + nn.Linear(fc_dim // 2, 1), + ] ) def forward( @@ -88,10 +95,10 @@ def forward( sample (`torch.FloatTensor`): (batch, horizon, obs_dimension + action_dimension) noisy inputs tensor timestep (`torch.FloatTensor` or `float` or `int): batch (batch) timesteps return_dict (`bool`, *optional*, defaults to `True`): - Whether or not to return a [`~models.unet_2d.UNet2DOutput`] instead of a plain tuple. + Whether or not to return a [`~models.unet_rl.ValueFunctionOutput`] instead of a plain tuple. Returns: - [`~models.unet_2d.UNet2DOutput`] or `tuple`: [`~models.unet_2d.UNet2DOutput`] if `return_dict` is True, + [`~models.unet_rl.ValueFunctionOutput`] or `tuple`: [`~models.unet_rl.ValueFunctionOutput`] if `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. """ sample = sample.permute(0, 2, 1) diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 98f15f8dbb64..9072710886b8 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -284,7 +284,7 @@ def step( model_output.size(), dtype=model_output.dtype, layout=model_output.layout, generator=generator ).to(model_output.device) if self.variance_type == "fixed_small_log": - variance = (self._get_variance(t, predicted_variance=predicted_variance)) + variance = self._get_variance(t, predicted_variance=predicted_variance) variance = torch.exp(0.5 * variance) variance = variance * noise From 93fe3ef0ab23a25c66c77b6f5e988b360e54bb73 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Mon, 17 Oct 2022 15:03:11 -0400 Subject: [PATCH 21/32] style and quality --- examples/diffuser/helpers.py | 12 +- examples/diffuser/run_diffuser.py | 18 +-- .../diffuser/run_diffuser_value_guided.py | 115 ++++++------------ examples/diffuser/train_diffuser.py | 97 ++++++++------- src/diffusers/models/unet_rl.py | 5 +- 5 files changed, 106 insertions(+), 141 deletions(-) diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py index c2ec457abad7..3d873e4112dc 100644 --- a/examples/diffuser/helpers.py +++ b/examples/diffuser/helpers.py @@ -6,6 +6,7 @@ import gym import mediapy as media +import mujoco_py as mjc import tqdm @@ -107,11 +108,11 @@ def show_sample(renderer, observations, filename="sample.mp4", savebase="videos" images = [] for rollout in observations: - ## [ horizon x height x width x channels ] + # [ horizon x height x width x channels ] img = renderer._renders(rollout, partial=True) images.append(img) - ## [ horizon x height x (batch_size * width) x channels ] + # [ horizon x height x (batch_size * width) x channels ] images = np.concatenate(images, axis=2) media.write_video(savepath, images, fps=60) media.show_video(images, codec="h264", fps=60) @@ -120,7 +121,6 @@ def show_sample(renderer, observations, filename="sample.mp4", savebase="videos" # Code adapted from Michael Janner # source: https://github.com/jannerm/diffuser/blob/main/diffuser/utils/rendering.py -import mujoco_py as mjc def env_map(env_name): @@ -173,8 +173,8 @@ def __init__(self, env): self.env = gym.make(env) else: self.env = env - ## - 1 because the envs in renderer are fully-observed - ## @TODO : clean up + # - 1 because the envs in renderer are fully-observed + # @TODO : clean up self.observation_dim = np.prod(self.env.observation_space.shape) - 1 self.action_dim = np.prod(self.env.action_space.shape) try: @@ -194,7 +194,7 @@ def pad_observation(self, observation): def pad_observations(self, observations): qpos_dim = self.env.sim.data.qpos.size - ## xpos is hidden + # xpos is hidden xvel_dim = qpos_dim - 1 xvel = observations[:, xvel_dim] xpos = np.cumsum(xvel) * self.env.dt diff --git a/examples/diffuser/run_diffuser.py b/examples/diffuser/run_diffuser.py index e69ffc117e3e..80eb8f20dadd 100644 --- a/examples/diffuser/run_diffuser.py +++ b/examples/diffuser/run_diffuser.py @@ -1,10 +1,11 @@ import numpy as np import torch -import d4rl +import d4rl # noqa import gym import helpers import tqdm +from diffusers import DDPMScheduler, UNet1DModel env_name = "hopper-medium-expert-v2" @@ -48,9 +49,6 @@ def to_torch(x_in, dtype=None, device=None): return torch.tensor(x_in, dtype=dtype, device=device) -from diffusers import DDPMScheduler, TemporalUNet - - # Two generators for different parts of the diffusion loop to work in colab generator_cpu = torch.Generator(device="cpu") @@ -58,9 +56,11 @@ def to_torch(x_in, dtype=None, device=None): # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. -network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) +network = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) + + def reset_x0(x_in, cond, act_dim): for key, val in cond.items(): x_in[:, key, act_dim:] = val.clone() @@ -71,8 +71,8 @@ def reset_x0(x_in, cond, act_dim): clip_denoised = network.clip_denoised predict_epsilon = network.predict_epsilon -## add a batch dimension and repeat for multiple samples -## [ observation_dim ] --> [ n_samples x observation_dim ] +# add a batch dimension and repeat for multiple samples +# [ observation_dim ] --> [ n_samples x observation_dim ] obs = env.reset() total_reward = 0 done = False @@ -137,10 +137,10 @@ def reset_x0(x_in, cond, act_dim): # select action at correct time action = plans[idx, 0, :] actions = de_normalize(action, data, "actions") - ## execute action in environment + # execute action in environment next_observation, reward, terminal, _ = env.step(action) - ## update return + # update return total_reward += reward print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}") diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 529f4bbb66ac..31dbc8536f69 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -1,45 +1,13 @@ -import d4rl - +import numpy as np import torch + +import d4rl # noqa +import gym +import helpers import tqdm -import numpy as np -import gym from diffusers import DDPMScheduler, UNet1DModel, ValueFunction from helpers import MuJoCoRenderer, show_sample -import helpers -import wandb -import modal -import os -from pytorch_lightning import seed_everything - -seed_everything(0) - -stub = modal.Stub("diffusers-value-guided") -image = modal.Image.debian_slim().apt_install([ - "libgl1-mesa-dev", - "libgl1-mesa-glx", - "libglew-dev", - "libosmesa6-dev", - "software-properties-common", - "patchelf", - "git", - "ffmpeg", -]).pip_install([ - "torch", - "datasets", - "transformers", - "free-mujoco-py", - "einops", - "gym", - "protobuf==3.20.1", - "git+https://github.com/rail-berkeley/d4rl.git", - "wandb", - "mediapy", - "Pillow==9.0.0", - "moviepy", - "imageio", - "pytorch-lightning", - ]) + config = dict( n_samples=64, @@ -50,74 +18,73 @@ scale=0.1, eta=0.0, t_grad_cutoff=2, - device='cuda' + device="cpu", ) + def _run(): - wandb.init(project="diffusers-value-guided-rl") - wandb.config.update(config) env_name = "hopper-medium-v2" env = gym.make(env_name) - data = env.get_dataset() # dataset is only used for normalization in this colab + data = env.get_dataset() # dataset is only used for normalization in this colab render = MuJoCoRenderer(env) # Cuda settings for colab # torch.cuda.get_device_name(0) - DEVICE = config['device'] - DTYPE = torch.float + DEVICE = config["device"] # diffusion model settings - state_dim = env.observation_space.shape[0] + state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] # Two generators for different parts of the diffusion loop to work in colab # generator = torch.Generator(device='cuda') generator = torch.Generator(device=DEVICE) - scheduler = DDPMScheduler(num_train_timesteps=config['num_inference_steps'],beta_schedule="squaredcos_cap_v2", clip_sample=False, variance_type="fixed_small_log") + scheduler = DDPMScheduler( + num_train_timesteps=config["num_inference_steps"], + beta_schedule="squaredcos_cap_v2", + clip_sample=False, + variance_type="fixed_small_log", + ) - # 3 different pretrained models are available for this task. + # 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) network = ValueFunction.from_pretrained("bglick13/hopper-medium-v2-value-function-hor32").to(device=DEVICE).eval() - unet = UNet1DModel.from_pretrained(f"bglick13/hopper-medium-v2-unet-hor32").to(device=DEVICE).eval() + unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-unet-hor32").to(device=DEVICE).eval() # unet = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) - ## add a batch dimension and repeat for multiple samples - ## [ observation_dim ] --> [ n_samples x observation_dim ] + # add a batch dimension and repeat for multiple samples + # [ observation_dim ] --> [ n_samples x observation_dim ] env.seed(0) obs = env.reset() total_reward = 0 total_score = 0 - done = False T = 1000 rollout = [obs.copy()] trajectories = [] y_maxes = [0] try: for t in tqdm.tqdm(range(T)): - obs_raw = obs # 1. Call the policy # normalize observations for forward passes - obs = helpers.normalize(obs, data, 'observations') + obs = helpers.normalize(obs, data, "observations") - obs = obs[None].repeat(config['n_samples'], axis=0) - conditions = { - 0: helpers.to_torch(obs, device=DEVICE) - } + obs = obs[None].repeat(config["n_samples"], axis=0) + conditions = {0: helpers.to_torch(obs, device=DEVICE)} # 2. Call the diffusion model # constants for inference batch_size = len(conditions[0]) - shape = (batch_size, config['horizon'], state_dim+action_dim) + shape = (batch_size, config["horizon"], state_dim + action_dim) # sample random initial noise vector x1 = torch.randn(shape, device=DEVICE) # this model is conditioned from an initial state, so you will see this function - # multiple times to change the initial state of generated data to the state + # multiple times to change the initial state of generated data to the state # generated via env.reset() above or env.step() below x = helpers.reset_x0(x1, conditions, action_dim) @@ -133,23 +100,24 @@ def _run(): actions = sorted_values[:, :, :action_dim] if t % 10 == 0: trajectory = sorted_values[:, :, action_dim:][0].unsqueeze(0).detach().cpu().numpy() - trajectory = helpers.de_normalize(trajectory, data, 'observations') + trajectory = helpers.de_normalize(trajectory, data, "observations") trajectories.append(trajectory) actions = actions.detach().cpu().numpy() - denorm_actions = helpers.de_normalize(actions, data, key='actions') + denorm_actions = helpers.de_normalize(actions, data, key="actions") # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] denorm_actions = denorm_actions[0, 0] - - ## execute action in environment + # execute action in environment next_observation, reward, terminal, _ = env.step(denorm_actions) score = env.get_normalized_score(total_reward) - ## update return + # update return total_reward += reward total_score += score - wandb.log({"total_reward": total_reward, "reward": reward, "score": score, "total_score": total_score, "y_max": y_maxes[-1], "diff_from_expert_reward": reward - data['rewards'][t]}) - print(f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score: {total_score}") + print( + f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:" + f" {total_score}" + ) # save observations for rendering rollout.append(next_observation.copy()) @@ -159,21 +127,12 @@ def _run(): print(f"Total reward: {total_reward}") - images = show_sample(render, np.expand_dims(np.stack(rollout),axis=0)) - wandb.log({"rollout": wandb.Video("videos/sample.mp4", fps=60, format='mp4')}) + show_sample(render, np.expand_dims(np.stack(rollout), axis=0)) + -@stub.function( - image=image, - secret=modal.Secret.from_name("wandb-api-key"), - mounts=modal.create_package_mounts(["diffusers"]), - gpu=True -) def run(): - wandb.login(key=os.environ["WANDB_API_KEY"]) _run() if __name__ == "__main__": - # _run() - with stub.run(): - run() + run() diff --git a/examples/diffuser/train_diffuser.py b/examples/diffuser/train_diffuser.py index 902f5ec7357c..dd226fe3c813 100644 --- a/examples/diffuser/train_diffuser.py +++ b/examples/diffuser/train_diffuser.py @@ -1,75 +1,82 @@ -import d4rl - -import torch -import tqdm import numpy as np -import gym -from accelerate import Accelerator +import torch + +import d4rl # noqa +import gym +from diffusers import DDPMScheduler, UNet1DModel + + env_name = "hopper-medium-expert-v2" env = gym.make(env_name) -data = env.get_dataset() # dataset is only used for normalization in this colab +data = env.get_dataset() # dataset is only used for normalization in this colab # Cuda settings for colab # torch.cuda.get_device_name(0) -DEVICE = 'cpu' +DEVICE = "cpu" DTYPE = torch.float # diffusion model settings -n_samples = 4 # number of trajectories planned via diffusion -horizon = 128 # length of sampled trajectories -state_dim = env.observation_space.shape[0] +n_samples = 4 # number of trajectories planned via diffusion +horizon = 128 # length of sampled trajectories +state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] -num_inference_steps = 100 # number of difusion steps +num_inference_steps = 100 # number of difusion steps + def normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = 2*(x_in - lower)/(upper-lower) - 1 - return x_out + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = 2 * (x_in - lower) / (upper - lower) - 1 + return x_out + def de_normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = lower + (upper - lower)*(1 + x_in) /2 - return x_out - + upper = np.max(data[key], axis=0) + lower = np.min(data[key], axis=0) + x_out = lower + (upper - lower) * (1 + x_in) / 2 + return x_out + + def to_torch(x_in, dtype=None, device=None): - dtype = dtype or DTYPE - device = device or DEVICE - if type(x_in) is dict: - return {k: to_torch(v, dtype, device) for k, v in x_in.items()} - elif torch.is_tensor(x_in): - return x_in.to(device).type(dtype) - return torch.tensor(x_in, dtype=dtype, device=device) + dtype = dtype or DTYPE + device = device or DEVICE + if type(x_in) is dict: + return {k: to_torch(v, dtype, device) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(device).type(dtype) + return torch.tensor(x_in, dtype=dtype, device=device) + obs = env.reset() obs_raw = obs # normalize observations for forward passes -obs = normalize(obs, data, 'observations') +obs = normalize(obs, data, "observations") -from diffusers import DDPMScheduler, TemporalUNet # Two generators for different parts of the diffusion loop to work in colab -generator = torch.Generator(device='cuda') -generator_cpu = torch.Generator(device='cpu') -network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) +generator = torch.Generator(device="cuda") +generator_cpu = torch.Generator(device="cpu") +network = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) -scheduler = DDPMScheduler(num_train_timesteps=100,beta_schedule="squaredcos_cap_v2") +scheduler = DDPMScheduler(num_train_timesteps=100, beta_schedule="squaredcos_cap_v2") optimizer = torch.optim.AdamW( - network.parameters(), - lr=0.001, - betas=(0.95, 0.99), - weight_decay=1e-6, - eps=1e-8, - ) -# 3 different pretrained models are available for this task. + network.parameters(), + lr=0.001, + betas=(0.95, 0.99), + weight_decay=1e-6, + eps=1e-8, +) +# 3 different pretrained models are available for this task. # The horizion represents the length of trajectories used in training. # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) + + def reset_x0(x_in, cond, act_dim): - for key, val in cond.items(): - x_in[:, key, act_dim:] = val.clone() - return x_in + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + -# TODO: Flesh this out using accelerate library (a la other examples) \ No newline at end of file +# TODO: Flesh this out using accelerate library (a la other examples) diff --git a/src/diffusers/models/unet_rl.py b/src/diffusers/models/unet_rl.py index b6e052c8922f..66822f99b198 100644 --- a/src/diffusers/models/unet_rl.py +++ b/src/diffusers/models/unet_rl.py @@ -55,7 +55,6 @@ def __init__( for i, down_block_type in enumerate(down_block_types): input_channel = output_channel output_channel = block_out_channels[i] - is_final_block = i == len(block_out_channels) - 1 down_block_type = down_block_types[i] down_block = get_down_block( @@ -98,8 +97,8 @@ def forward( Whether or not to return a [`~models.unet_rl.ValueFunctionOutput`] instead of a plain tuple. Returns: - [`~models.unet_rl.ValueFunctionOutput`] or `tuple`: [`~models.unet_rl.ValueFunctionOutput`] if `return_dict` is True, - otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + [`~models.unet_rl.ValueFunctionOutput`] or `tuple`: [`~models.unet_rl.ValueFunctionOutput`] if + `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. """ sample = sample.permute(0, 2, 1) From 4e378e9518e9a2fb81507bf83e0d7918b35aef34 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Tue, 18 Oct 2022 19:18:03 -0400 Subject: [PATCH 22/32] edits based on comments --- .gitignore | 4 +- examples/diffuser/helpers.py | 267 ---------------- examples/diffuser/run_diffuser.py | 52 +-- .../diffuser/run_diffuser_value_guided.py | 21 +- examples/diffuser/train_diffuser.py | 298 ++++++++++++++++-- .../convert_models_diffuser_to_diffusers.py | 0 src/diffusers/schedulers/scheduling_ddpm.py | 6 +- 7 files changed, 289 insertions(+), 359 deletions(-) delete mode 100644 examples/diffuser/helpers.py rename convert_model.py => scripts/convert_models_diffuser_to_diffusers.py (100%) diff --git a/.gitignore b/.gitignore index f066e7f84299..f018a111ea33 100644 --- a/.gitignore +++ b/.gitignore @@ -164,5 +164,5 @@ tags # DS_Store (MacOS) .DS_Store -*.mp4 -hub/* \ No newline at end of file +# RL pipelines may produce mp4 outputs +*.mp4 \ No newline at end of file diff --git a/examples/diffuser/helpers.py b/examples/diffuser/helpers.py deleted file mode 100644 index 3d873e4112dc..000000000000 --- a/examples/diffuser/helpers.py +++ /dev/null @@ -1,267 +0,0 @@ -import os -import warnings - -import numpy as np -import torch - -import gym -import mediapy as media -import mujoco_py as mjc -import tqdm - - -DTYPE = torch.float - - -def normalize(x_in, data, key): - means = data[key].mean(axis=0) - stds = data[key].std(axis=0) - return (x_in - means) / stds - - -def de_normalize(x_in, data, key): - means = data[key].mean(axis=0) - stds = data[key].std(axis=0) - return x_in * stds + means - - -def to_torch(x_in, dtype=None, device="cuda"): - dtype = dtype or DTYPE - device = device - if type(x_in) is dict: - return {k: to_torch(v, dtype, device) for k, v in x_in.items()} - elif torch.is_tensor(x_in): - return x_in.to(device).type(dtype) - return torch.tensor(x_in, dtype=dtype, device=device) - - -def reset_x0(x_in, cond, act_dim): - for key, val in cond.items(): - x_in[:, key, act_dim:] = val.clone() - return x_in - - -def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config): - y = None - for i in tqdm.tqdm(scheduler.timesteps): - # create batch of timesteps to pass into model - timesteps = torch.full((config["n_samples"],), i, device=config["device"], dtype=torch.long) - # 3. call the sample function - for _ in range(config["n_guide_steps"]): - with torch.enable_grad(): - x.requires_grad_() - y = network(x, timesteps).sample - grad = torch.autograd.grad([y.sum()], [x])[0] - if config["scale_grad_by_std"]: - posterior_variance = scheduler._get_variance(i) - model_std = torch.exp(0.5 * posterior_variance) - grad = model_std * grad - grad[timesteps < config["t_grad_cutoff"]] = 0 - x = x.detach() - x = x + config["scale"] * grad - x = reset_x0(x, conditions, action_dim) - # with torch.no_grad(): - prev_x = unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) - x = scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] - - # 3. [optional] add posterior noise to the sample - if config["eta"] > 0: - noise = torch.randn(x.shape).to(x.device) - posterior_variance = scheduler._get_variance(i) # * noise - # no noise when t == 0 - # NOTE: original implementation missing sqrt on posterior_variance - x = x + int(i > 0) * (0.5 * posterior_variance) * config["eta"] * noise # MJ had as log var, exponentiated - - # 4. apply conditions to the trajectory - x = reset_x0(x, conditions, action_dim) - x = to_torch(x, device=config["device"]) - # y = network(x, timesteps).sample - return x, y - - -def to_np(x_in): - if torch.is_tensor(x_in): - x_in = x_in.detach().cpu().numpy() - return x_in - - -# from MJ's Diffuser code -# https://github.com/jannerm/diffuser/blob/76ae49ae85ba1c833bf78438faffdc63b8b4d55d/diffuser/utils/colab.py#L79 -def mkdir(savepath): - """ - returns `True` iff `savepath` is created - """ - if not os.path.exists(savepath): - os.makedirs(savepath) - return True - else: - return False - - -def show_sample(renderer, observations, filename="sample.mp4", savebase="videos"): - """ - observations : [ batch_size x horizon x observation_dim ] - """ - - mkdir(savebase) - savepath = os.path.join(savebase, filename) - - images = [] - for rollout in observations: - # [ horizon x height x width x channels ] - img = renderer._renders(rollout, partial=True) - images.append(img) - - # [ horizon x height x (batch_size * width) x channels ] - images = np.concatenate(images, axis=2) - media.write_video(savepath, images, fps=60) - media.show_video(images, codec="h264", fps=60) - return images - - -# Code adapted from Michael Janner -# source: https://github.com/jannerm/diffuser/blob/main/diffuser/utils/rendering.py - - -def env_map(env_name): - """ - map D4RL dataset names to custom fully-observed - variants for rendering - """ - if "halfcheetah" in env_name: - return "HalfCheetahFullObs-v2" - elif "hopper" in env_name: - return "HopperFullObs-v2" - elif "walker2d" in env_name: - return "Walker2dFullObs-v2" - else: - return env_name - - -def get_image_mask(img): - background = (img == 255).all(axis=-1, keepdims=True) - mask = ~background.repeat(3, axis=-1) - return mask - - -def atmost_2d(x): - while x.ndim > 2: - x = x.squeeze(0) - return x - - -def set_state(env, state): - qpos_dim = env.sim.data.qpos.size - qvel_dim = env.sim.data.qvel.size - if not state.size == qpos_dim + qvel_dim: - warnings.warn( - f"[ utils/rendering ] Expected state of size {qpos_dim + qvel_dim}, but got state of size {state.size}" - ) - state = state[: qpos_dim + qvel_dim] - - env.set_state(state[:qpos_dim], state[qpos_dim:]) - - -class MuJoCoRenderer: - """ - default mujoco renderer - """ - - def __init__(self, env): - if type(env) is str: - env = env_map(env) - self.env = gym.make(env) - else: - self.env = env - # - 1 because the envs in renderer are fully-observed - # @TODO : clean up - self.observation_dim = np.prod(self.env.observation_space.shape) - 1 - self.action_dim = np.prod(self.env.action_space.shape) - try: - self.viewer = mjc.MjRenderContextOffscreen(self.env.sim) - except: - print("[ utils/rendering ] Warning: could not initialize offscreen renderer") - self.viewer = None - - def pad_observation(self, observation): - state = np.concatenate( - [ - np.zeros(1), - observation, - ] - ) - return state - - def pad_observations(self, observations): - qpos_dim = self.env.sim.data.qpos.size - # xpos is hidden - xvel_dim = qpos_dim - 1 - xvel = observations[:, xvel_dim] - xpos = np.cumsum(xvel) * self.env.dt - states = np.concatenate( - [ - xpos[:, None], - observations, - ], - axis=-1, - ) - return states - - def render(self, observation, dim=256, partial=False, qvel=True, render_kwargs=None, conditions=None): - if type(dim) == int: - dim = (dim, dim) - - if self.viewer is None: - return np.zeros((*dim, 3), np.uint8) - - if render_kwargs is None: - xpos = observation[0] if not partial else 0 - render_kwargs = {"trackbodyid": 2, "distance": 3, "lookat": [xpos, -0.5, 1], "elevation": -20} - - for key, val in render_kwargs.items(): - if key == "lookat": - self.viewer.cam.lookat[:] = val[:] - else: - setattr(self.viewer.cam, key, val) - - if partial: - state = self.pad_observation(observation) - else: - state = observation - - qpos_dim = self.env.sim.data.qpos.size - if not qvel or state.shape[-1] == qpos_dim: - qvel_dim = self.env.sim.data.qvel.size - state = np.concatenate([state, np.zeros(qvel_dim)]) - - set_state(self.env, state) - - self.viewer.render(*dim) - data = self.viewer.read_pixels(*dim, depth=False) - data = data[::-1, :, :] - return data - - def _renders(self, observations, **kwargs): - images = [] - for observation in observations: - img = self.render(observation, **kwargs) - images.append(img) - return np.stack(images, axis=0) - - def renders(self, samples, partial=False, **kwargs): - if partial: - samples = self.pad_observations(samples) - partial = False - - sample_images = self._renders(samples, partial=partial, **kwargs) - - composite = np.ones_like(sample_images[0]) * 255 - - for img in sample_images: - mask = get_image_mask(img) - composite[mask] = img[mask] - - return composite - - def __call__(self, *args, **kwargs): - return self.renders(*args, **kwargs) diff --git a/examples/diffuser/run_diffuser.py b/examples/diffuser/run_diffuser.py index 80eb8f20dadd..ce11363bbbb9 100644 --- a/examples/diffuser/run_diffuser.py +++ b/examples/diffuser/run_diffuser.py @@ -3,7 +3,7 @@ import d4rl # noqa import gym -import helpers +import train_diffuser import tqdm from diffusers import DDPMScheduler, UNet1DModel @@ -25,30 +25,6 @@ num_inference_steps = 100 # number of difusion steps -def normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = 2 * (x_in - lower) / (upper - lower) - 1 - return x_out - - -def de_normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = lower + (upper - lower) * (1 + x_in) / 2 - return x_out - - -def to_torch(x_in, dtype=None, device=None): - dtype = dtype or DTYPE - device = device or DEVICE - if type(x_in) is dict: - return {k: to_torch(v, dtype, device) for k, v in x_in.items()} - elif torch.is_tensor(x_in): - return x_in.to(device).type(dtype) - return torch.tensor(x_in, dtype=dtype, device=device) - - # Two generators for different parts of the diffusion loop to work in colab generator_cpu = torch.Generator(device="cpu") @@ -61,12 +37,6 @@ def to_torch(x_in, dtype=None, device=None): # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) -def reset_x0(x_in, cond, act_dim): - for key, val in cond.items(): - x_in[:, key, act_dim:] = val.clone() - return x_in - - # network specific constants for inference clip_denoised = network.clip_denoised predict_epsilon = network.predict_epsilon @@ -84,9 +54,9 @@ def reset_x0(x_in, cond, act_dim): obs_raw = obs # normalize observations for forward passes - obs = normalize(obs, data, "observations") + obs = train_diffuser.normalize(obs, data, "observations") obs = obs[None].repeat(n_samples, axis=0) - conditions = {0: to_torch(obs, device=DEVICE)} + conditions = {0: train_diffuser.to_torch(obs, device=DEVICE)} # constants for inference batch_size = len(conditions[0]) @@ -98,10 +68,10 @@ def reset_x0(x_in, cond, act_dim): # this model is conditioned from an initial state, so you will see this function # multiple times to change the initial state of generated data to the state # generated via env.reset() above or env.step() below - x = reset_x0(x1, conditions, action_dim) + x = train_diffuser.reset_x0(x1, conditions, action_dim) # convert a np observation to torch for model forward pass - x = to_torch(x) + x = train_diffuser.to_torch(x) eta = 1.0 # noise factor for sampling reconstructed state @@ -129,14 +99,14 @@ def reset_x0(x_in, cond, act_dim): ) # MJ had as log var, exponentiated # 4. apply conditions to the trajectory - obs_reconstruct_postcond = reset_x0(obs_reconstruct, conditions, action_dim) - x = to_torch(obs_reconstruct_postcond) - plans = helpers.to_np(x[:, :, :action_dim]) + obs_reconstruct_postcond = train_diffuser.reset_x0(obs_reconstruct, conditions, action_dim) + x = train_diffuser.to_torch(obs_reconstruct_postcond) + plans = train_diffuser.helpers.to_np(x[:, :, :action_dim]) # select random plan idx = np.random.randint(plans.shape[0]) # select action at correct time action = plans[idx, 0, :] - actions = de_normalize(action, data, "actions") + actions = train_diffuser.de_normalize(action, data, "actions") # execute action in environment next_observation, reward, terminal, _ = env.step(action) @@ -151,5 +121,5 @@ def reset_x0(x_in, cond, act_dim): pass print(f"Total reward: {total_reward}") -render = helpers.MuJoCoRenderer(env) -helpers.show_sample(render, np.expand_dims(np.stack(rollout), axis=0)) +render = train_diffuser.MuJoCoRenderer(env) +train_diffuser.show_sample(render, np.expand_dims(np.stack(rollout), axis=0)) diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 31dbc8536f69..cc61650ddcdf 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -3,10 +3,9 @@ import d4rl # noqa import gym -import helpers +import train_diffuser import tqdm from diffusers import DDPMScheduler, UNet1DModel, ValueFunction -from helpers import MuJoCoRenderer, show_sample config = dict( @@ -26,7 +25,7 @@ def _run(): env_name = "hopper-medium-v2" env = gym.make(env_name) data = env.get_dataset() # dataset is only used for normalization in this colab - render = MuJoCoRenderer(env) + render = train_diffuser.MuJoCoRenderer(env) # Cuda settings for colab # torch.cuda.get_device_name(0) @@ -70,10 +69,10 @@ def _run(): for t in tqdm.tqdm(range(T)): # 1. Call the policy # normalize observations for forward passes - obs = helpers.normalize(obs, data, "observations") + obs = train_diffuser.normalize(obs, data, "observations") obs = obs[None].repeat(config["n_samples"], axis=0) - conditions = {0: helpers.to_torch(obs, device=DEVICE)} + conditions = {0: train_diffuser.to_torch(obs, device=DEVICE)} # 2. Call the diffusion model # constants for inference @@ -86,11 +85,11 @@ def _run(): # this model is conditioned from an initial state, so you will see this function # multiple times to change the initial state of generated data to the state # generated via env.reset() above or env.step() below - x = helpers.reset_x0(x1, conditions, action_dim) + x = train_diffuser.reset_x0(x1, conditions, action_dim) # convert a np observation to torch for model forward pass - x = helpers.to_torch(x, device=DEVICE) - x, y = helpers.run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config) + x = train_diffuser.to_torch(x, device=DEVICE) + x, y = train_diffuser.run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config) if y is not None: sorted_idx = y.argsort(0, descending=True).squeeze() y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) @@ -100,11 +99,11 @@ def _run(): actions = sorted_values[:, :, :action_dim] if t % 10 == 0: trajectory = sorted_values[:, :, action_dim:][0].unsqueeze(0).detach().cpu().numpy() - trajectory = helpers.de_normalize(trajectory, data, "observations") + trajectory = train_diffuser.de_normalize(trajectory, data, "observations") trajectories.append(trajectory) actions = actions.detach().cpu().numpy() - denorm_actions = helpers.de_normalize(actions, data, key="actions") + denorm_actions = train_diffuser.de_normalize(actions, data, key="actions") # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] denorm_actions = denorm_actions[0, 0] @@ -127,7 +126,7 @@ def _run(): print(f"Total reward: {total_reward}") - show_sample(render, np.expand_dims(np.stack(rollout), axis=0)) + train_diffuser.show_sample(render, np.expand_dims(np.stack(rollout), axis=0)) def run(): diff --git a/examples/diffuser/train_diffuser.py b/examples/diffuser/train_diffuser.py index dd226fe3c813..784ba4d48d52 100644 --- a/examples/diffuser/train_diffuser.py +++ b/examples/diffuser/train_diffuser.py @@ -1,45 +1,38 @@ +import os +import warnings + import numpy as np import torch +import mediapy as media +import mujoco_py as mjc +import tqdm import d4rl # noqa import gym from diffusers import DDPMScheduler, UNet1DModel -env_name = "hopper-medium-expert-v2" -env = gym.make(env_name) -data = env.get_dataset() # dataset is only used for normalization in this colab +# Define some helper functions -# Cuda settings for colab -# torch.cuda.get_device_name(0) -DEVICE = "cpu" -DTYPE = torch.float -# diffusion model settings -n_samples = 4 # number of trajectories planned via diffusion -horizon = 128 # length of sampled trajectories -state_dim = env.observation_space.shape[0] -action_dim = env.action_space.shape[0] -num_inference_steps = 100 # number of difusion steps +DTYPE = torch.float def normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = 2 * (x_in - lower) / (upper - lower) - 1 - return x_out + means = data[key].mean(axis=0) + stds = data[key].std(axis=0) + return (x_in - means) / stds def de_normalize(x_in, data, key): - upper = np.max(data[key], axis=0) - lower = np.min(data[key], axis=0) - x_out = lower + (upper - lower) * (1 + x_in) / 2 - return x_out + means = data[key].mean(axis=0) + stds = data[key].std(axis=0) + return x_in * stds + means -def to_torch(x_in, dtype=None, device=None): +def to_torch(x_in, dtype=None, device="cuda"): dtype = dtype or DTYPE - device = device or DEVICE + device = device if type(x_in) is dict: return {k: to_torch(v, dtype, device) for k, v in x_in.items()} elif torch.is_tensor(x_in): @@ -47,6 +40,254 @@ def to_torch(x_in, dtype=None, device=None): return torch.tensor(x_in, dtype=dtype, device=device) +def reset_x0(x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + + +def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config): + y = None + for i in tqdm.tqdm(scheduler.timesteps): + # create batch of timesteps to pass into model + timesteps = torch.full((config["n_samples"],), i, device=config["device"], dtype=torch.long) + # 3. call the sample function + for _ in range(config["n_guide_steps"]): + with torch.enable_grad(): + x.requires_grad_() + y = network(x, timesteps).sample + grad = torch.autograd.grad([y.sum()], [x])[0] + if config["scale_grad_by_std"]: + posterior_variance = scheduler._get_variance(i) + model_std = torch.exp(0.5 * posterior_variance) + grad = model_std * grad + grad[timesteps < config["t_grad_cutoff"]] = 0 + x = x.detach() + x = x + config["scale"] * grad + x = reset_x0(x, conditions, action_dim) + # with torch.no_grad(): + prev_x = unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) + x = scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] + + # 3. [optional] add posterior noise to the sample + if config["eta"] > 0: + noise = torch.randn(x.shape).to(x.device) + posterior_variance = scheduler._get_variance(i) # * noise + # no noise when t == 0 + # NOTE: original implementation missing sqrt on posterior_variance + x = x + int(i > 0) * (0.5 * posterior_variance) * config["eta"] * noise # MJ had as log var, exponentiated + + # 4. apply conditions to the trajectory + x = reset_x0(x, conditions, action_dim) + x = to_torch(x, device=config["device"]) + # y = network(x, timesteps).sample + return x, y + + +def to_np(x_in): + if torch.is_tensor(x_in): + x_in = x_in.detach().cpu().numpy() + return x_in + + +# from MJ's Diffuser code +# https://github.com/jannerm/diffuser/blob/76ae49ae85ba1c833bf78438faffdc63b8b4d55d/diffuser/utils/colab.py#L79 +def mkdir(savepath): + """ + returns `True` iff `savepath` is created + """ + if not os.path.exists(savepath): + os.makedirs(savepath) + return True + else: + return False + + +def show_sample(renderer, observations, filename="sample.mp4", savebase="videos"): + """ + observations : [ batch_size x horizon x observation_dim ] + """ + + mkdir(savebase) + savepath = os.path.join(savebase, filename) + + images = [] + for rollout in observations: + # [ horizon x height x width x channels ] + img = renderer._renders(rollout, partial=True) + images.append(img) + + # [ horizon x height x (batch_size * width) x channels ] + images = np.concatenate(images, axis=2) + media.write_video(savepath, images, fps=60) + media.show_video(images, codec="h264", fps=60) + return images + + +# Code adapted from Michael Janner +# source: https://github.com/jannerm/diffuser/blob/main/diffuser/utils/rendering.py + + +def env_map(env_name): + """ + map D4RL dataset names to custom fully-observed + variants for rendering + """ + if "halfcheetah" in env_name: + return "HalfCheetahFullObs-v2" + elif "hopper" in env_name: + return "HopperFullObs-v2" + elif "walker2d" in env_name: + return "Walker2dFullObs-v2" + else: + return env_name + + +def get_image_mask(img): + background = (img == 255).all(axis=-1, keepdims=True) + mask = ~background.repeat(3, axis=-1) + return mask + + +def atmost_2d(x): + while x.ndim > 2: + x = x.squeeze(0) + return x + + +def set_state(env, state): + qpos_dim = env.sim.data.qpos.size + qvel_dim = env.sim.data.qvel.size + if not state.size == qpos_dim + qvel_dim: + warnings.warn( + f"[ utils/rendering ] Expected state of size {qpos_dim + qvel_dim}, but got state of size {state.size}" + ) + state = state[: qpos_dim + qvel_dim] + + env.set_state(state[:qpos_dim], state[qpos_dim:]) + + +class MuJoCoRenderer: + """ + default mujoco renderer + """ + + def __init__(self, env): + if type(env) is str: + env = env_map(env) + self.env = gym.make(env) + else: + self.env = env + # - 1 because the envs in renderer are fully-observed + # @TODO : clean up + self.observation_dim = np.prod(self.env.observation_space.shape) - 1 + self.action_dim = np.prod(self.env.action_space.shape) + try: + self.viewer = mjc.MjRenderContextOffscreen(self.env.sim) + except: + print("[ utils/rendering ] Warning: could not initialize offscreen renderer") + self.viewer = None + + def pad_observation(self, observation): + state = np.concatenate( + [ + np.zeros(1), + observation, + ] + ) + return state + + def pad_observations(self, observations): + qpos_dim = self.env.sim.data.qpos.size + # xpos is hidden + xvel_dim = qpos_dim - 1 + xvel = observations[:, xvel_dim] + xpos = np.cumsum(xvel) * self.env.dt + states = np.concatenate( + [ + xpos[:, None], + observations, + ], + axis=-1, + ) + return states + + def render(self, observation, dim=256, partial=False, qvel=True, render_kwargs=None, conditions=None): + if type(dim) == int: + dim = (dim, dim) + + if self.viewer is None: + return np.zeros((*dim, 3), np.uint8) + + if render_kwargs is None: + xpos = observation[0] if not partial else 0 + render_kwargs = {"trackbodyid": 2, "distance": 3, "lookat": [xpos, -0.5, 1], "elevation": -20} + + for key, val in render_kwargs.items(): + if key == "lookat": + self.viewer.cam.lookat[:] = val[:] + else: + setattr(self.viewer.cam, key, val) + + if partial: + state = self.pad_observation(observation) + else: + state = observation + + qpos_dim = self.env.sim.data.qpos.size + if not qvel or state.shape[-1] == qpos_dim: + qvel_dim = self.env.sim.data.qvel.size + state = np.concatenate([state, np.zeros(qvel_dim)]) + + set_state(self.env, state) + + self.viewer.render(*dim) + data = self.viewer.read_pixels(*dim, depth=False) + data = data[::-1, :, :] + return data + + def _renders(self, observations, **kwargs): + images = [] + for observation in observations: + img = self.render(observation, **kwargs) + images.append(img) + return np.stack(images, axis=0) + + def renders(self, samples, partial=False, **kwargs): + if partial: + samples = self.pad_observations(samples) + partial = False + + sample_images = self._renders(samples, partial=partial, **kwargs) + + composite = np.ones_like(sample_images[0]) * 255 + + for img in sample_images: + mask = get_image_mask(img) + composite[mask] = img[mask] + + return composite + + def __call__(self, *args, **kwargs): + return self.renders(*args, **kwargs) + + +env_name = "hopper-medium-expert-v2" +env = gym.make(env_name) +data = env.get_dataset() # dataset is only used for normalization in this colab + +# Cuda settings for colab +# torch.cuda.get_device_name(0) +DEVICE = "cpu" +DTYPE = torch.float + +# diffusion model settings +n_samples = 4 # number of trajectories planned via diffusion +horizon = 128 # length of sampled trajectories +state_dim = env.observation_space.shape[0] +action_dim = env.action_space.shape[0] +num_inference_steps = 100 # number of difusion steps + obs = env.reset() obs_raw = obs @@ -67,16 +308,5 @@ def to_torch(x_in, dtype=None, device=None): weight_decay=1e-6, eps=1e-8, ) -# 3 different pretrained models are available for this task. -# The horizion represents the length of trajectories used in training. -# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor256").to(device=DEVICE) -# network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) - - -def reset_x0(x_in, cond, act_dim): - for key, val in cond.items(): - x_in[:, key, act_dim:] = val.clone() - return x_in - # TODO: Flesh this out using accelerate library (a la other examples) diff --git a/convert_model.py b/scripts/convert_models_diffuser_to_diffusers.py similarity index 100% rename from convert_model.py rename to scripts/convert_models_diffuser_to_diffusers.py diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 9072710886b8..06596bd6091f 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -200,6 +200,7 @@ def _get_variance(self, t, predicted_variance=None, variance_type=None): # for rl-diffuser https://arxiv.org/abs/2205.09991 elif variance_type == "fixed_small_log": variance = torch.log(torch.clamp(variance, min=1e-20)) + variance = torch.exp(0.5 * variance) elif variance_type == "fixed_large": variance = self.betas[t] elif variance_type == "fixed_large_log": @@ -284,10 +285,7 @@ def step( model_output.size(), dtype=model_output.dtype, layout=model_output.layout, generator=generator ).to(model_output.device) if self.variance_type == "fixed_small_log": - variance = self._get_variance(t, predicted_variance=predicted_variance) - variance = torch.exp(0.5 * variance) - variance = variance * noise - + variance = self._get_variance(t, predicted_variance=predicted_variance) * noise else: variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * noise From e7e6963fdcf0dc761c709b018edbc699bf0454e6 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Tue, 18 Oct 2022 19:19:28 -0400 Subject: [PATCH 23/32] style and quality --- examples/diffuser/run_diffuser.py | 2 +- .../diffuser/run_diffuser_value_guided.py | 2 +- examples/diffuser/train_diffuser.py | 4 +-- .../convert_models_diffuser_to_diffusers.py | 27 ++++++++++++++----- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/examples/diffuser/run_diffuser.py b/examples/diffuser/run_diffuser.py index ce11363bbbb9..ad35b59d99a7 100644 --- a/examples/diffuser/run_diffuser.py +++ b/examples/diffuser/run_diffuser.py @@ -3,8 +3,8 @@ import d4rl # noqa import gym -import train_diffuser import tqdm +import train_diffuser from diffusers import DDPMScheduler, UNet1DModel diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index cc61650ddcdf..05c10be374c6 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -3,8 +3,8 @@ import d4rl # noqa import gym -import train_diffuser import tqdm +import train_diffuser from diffusers import DDPMScheduler, UNet1DModel, ValueFunction diff --git a/examples/diffuser/train_diffuser.py b/examples/diffuser/train_diffuser.py index 784ba4d48d52..4f5e7bd0b680 100644 --- a/examples/diffuser/train_diffuser.py +++ b/examples/diffuser/train_diffuser.py @@ -4,11 +4,11 @@ import numpy as np import torch +import d4rl # noqa +import gym import mediapy as media import mujoco_py as mjc import tqdm -import d4rl # noqa -import gym from diffusers import DDPMScheduler, UNet1DModel diff --git a/scripts/convert_models_diffuser_to_diffusers.py b/scripts/convert_models_diffuser_to_diffusers.py index 4691c69239d7..821c6d51fb80 100644 --- a/scripts/convert_models_diffuser_to_diffusers.py +++ b/scripts/convert_models_diffuser_to_diffusers.py @@ -1,13 +1,17 @@ +import json +import os import torch -from diffusers import DDPMScheduler, UNet1DModel, ValueFunction -import os -import json + +from diffusers import UNet1DModel, ValueFunction + + os.makedirs("hub/hopper-medium-v2/unet/hor32", exist_ok=True) os.makedirs("hub/hopper-medium-v2/unet/hor128", exist_ok=True) os.makedirs("hub/hopper-medium-v2/value_function", exist_ok=True) + def unet(hor): if hor == 128: down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") @@ -20,7 +24,12 @@ def unet(hor): up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D", "UpResnetBlock1D") model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-mediumv2-hor{hor}.torch") state_dict = model.state_dict() - config = dict(down_block_types=down_block_types, block_out_channels=block_out_channels, up_block_types=up_block_types, layers_per_block=1) + config = dict( + down_block_types=down_block_types, + block_out_channels=block_out_channels, + up_block_types=up_block_types, + layers_per_block=1, + ) hf_value_function = UNet1DModel(**config) print(f"length of state dict: {len(state_dict.keys())}") print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") @@ -33,8 +42,14 @@ def unet(hor): with open(f"hub/hopper-medium-v2/unet/hor{hor}/config.json", "w") as f: json.dump(config, f) + def value_function(): - config = dict(in_channels=14, down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), block_out_channels=(32, 64, 128, 256), layers_per_block=1) + config = dict( + in_channels=14, + down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), + block_out_channels=(32, 64, 128, 256), + layers_per_block=1, + ) model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-mediumv2-hor32.torch") state_dict = model @@ -56,4 +71,4 @@ def value_function(): if __name__ == "__main__": unet(32) # unet(128) - value_function() \ No newline at end of file + value_function() From 4f77d892ab6690fd84fed069e01ddf211fed65f2 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Wed, 19 Oct 2022 13:39:34 -0400 Subject: [PATCH 24/32] remove unused var --- examples/diffuser/run_diffuser_value_guided.py | 5 +---- examples/diffuser/train_diffuser.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 05c10be374c6..9610a07c366f 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -36,9 +36,6 @@ def _run(): action_dim = env.action_space.shape[0] # Two generators for different parts of the diffusion loop to work in colab - # generator = torch.Generator(device='cuda') - generator = torch.Generator(device=DEVICE) - scheduler = DDPMScheduler( num_train_timesteps=config["num_inference_steps"], beta_schedule="squaredcos_cap_v2", @@ -89,7 +86,7 @@ def _run(): # convert a np observation to torch for model forward pass x = train_diffuser.to_torch(x, device=DEVICE) - x, y = train_diffuser.run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config) + x, y = train_diffuser.run_diffusion(x, scheduler, network, unet, conditions, action_dim, config) if y is not None: sorted_idx = y.argsort(0, descending=True).squeeze() y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) diff --git a/examples/diffuser/train_diffuser.py b/examples/diffuser/train_diffuser.py index 4f5e7bd0b680..b063a0456d97 100644 --- a/examples/diffuser/train_diffuser.py +++ b/examples/diffuser/train_diffuser.py @@ -46,7 +46,7 @@ def reset_x0(x_in, cond, act_dim): return x_in -def run_diffusion(x, scheduler, generator, network, unet, conditions, action_dim, config): +def run_diffusion(x, scheduler, network, unet, conditions, action_dim, config): y = None for i in tqdm.tqdm(scheduler.timesteps): # create batch of timesteps to pass into model From 6bd8397e93b8db272295309ad709c0a8f4127843 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 20 Oct 2022 14:51:15 -0400 Subject: [PATCH 25/32] hack unet1d into a value function --- .../convert_models_diffuser_to_diffusers.py | 9 ++- src/diffusers/models/unet_1d.py | 55 ++++++++----- src/diffusers/models/unet_1d_blocks.py | 79 ++++++++++++++++++- 3 files changed, 118 insertions(+), 25 deletions(-) diff --git a/scripts/convert_models_diffuser_to_diffusers.py b/scripts/convert_models_diffuser_to_diffusers.py index 821c6d51fb80..e957f1204596 100644 --- a/scripts/convert_models_diffuser_to_diffusers.py +++ b/scripts/convert_models_diffuser_to_diffusers.py @@ -3,7 +3,7 @@ import torch -from diffusers import UNet1DModel, ValueFunction +from diffusers import UNet1DModel os.makedirs("hub/hopper-medium-v2/unet/hor32", exist_ok=True) @@ -47,13 +47,16 @@ def value_function(): config = dict( in_channels=14, down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), + up_block_types=(), + out_block_type="ValueFunction", block_out_channels=(32, 64, 128, 256), layers_per_block=1, + always_downsample=True ) model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-mediumv2-hor32.torch") state_dict = model - hf_value_function = ValueFunction(**config) + hf_value_function = UNet1DModel(**config) print(f"length of state dict: {len(state_dict.keys())}") print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") @@ -69,6 +72,6 @@ def value_function(): if __name__ == "__main__": - unet(32) + # unet(32) # unet(128) value_function() diff --git a/src/diffusers/models/unet_1d.py b/src/diffusers/models/unet_1d.py index 3ede756c9b3d..d0483609fead 100644 --- a/src/diffusers/models/unet_1d.py +++ b/src/diffusers/models/unet_1d.py @@ -18,7 +18,7 @@ import torch.nn as nn from diffusers.models.resnet import ResidualTemporalBlock1D -from diffusers.models.unet_1d_blocks import get_down_block, get_up_block +from diffusers.models.unet_1d_blocks import get_down_block, get_mid_block, get_out_block, get_up_block from ..configuration_utils import ConfigMixin, register_to_config from ..modeling_utils import ModelMixin @@ -62,10 +62,13 @@ def __init__( out_channels: int = 14, down_block_types: Tuple[str] = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), up_block_types: Tuple[str] = ("UpResnetBlock1D", "UpResnetBlock1D"), + mid_block_types: Tuple[str] = ("MidResTemporalBlock1D", "MidResTemporalBlock1D"), + out_block_type: str = "OutConv1DBlock", block_out_channels: Tuple[int] = (32, 128, 256), act_fn: str = "mish", norm_num_groups: int = 8, layers_per_block: int = 1, + always_downsample: bool = False, ): super().__init__() @@ -95,14 +98,30 @@ def __init__( in_channels=input_channel, out_channels=output_channel, temb_channels=block_out_channels[0], - add_downsample=not is_final_block, + add_downsample=not is_final_block or always_downsample, ) self.down_blocks.append(down_block) # mid - self.mid_block1 = ResidualTemporalBlock1D(mid_dim, mid_dim, embed_dim=block_out_channels[0]) - self.mid_block2 = ResidualTemporalBlock1D(mid_dim, mid_dim, embed_dim=block_out_channels[0]) - + self.mid_blocks = nn.ModuleList([]) + for i, mid_block_type in enumerate(mid_block_types): + if always_downsample: + mid_block = get_mid_block( + mid_block_type, + in_channels=mid_dim // (i + 1), + out_channels=mid_dim // ((i + 1) * 2), + embed_dim=block_out_channels[0], + add_downsample=True, + ) + else: + mid_block = get_mid_block( + mid_block_type, + in_channels=mid_dim, + out_channels=mid_dim, + embed_dim=block_out_channels[0], + add_downsample=False, + ) + self.mid_blocks.append(mid_block) # up reversed_block_out_channels = list(reversed(block_out_channels)) for i, up_block_type in enumerate(up_block_types): @@ -123,13 +142,14 @@ def __init__( # out num_groups_out = norm_num_groups if norm_num_groups is not None else min(block_out_channels[0] // 4, 32) - self.final_conv1d_1 = nn.Conv1d(block_out_channels[0], block_out_channels[0], 5, padding=2) - self.final_conv1d_gn = nn.GroupNorm(num_groups_out, block_out_channels[0]) - if act_fn == "silu": - self.final_conv1d_act = nn.SiLU() - if act_fn == "mish": - self.final_conv1d_act = nn.Mish() - self.final_conv1d_2 = nn.Conv1d(block_out_channels[0], out_channels, 1) + self.out_block = get_out_block( + out_block_type=out_block_type, + num_groups_out=num_groups_out, + embed_dim=block_out_channels[0], + out_channels=out_channels, + act_fn=act_fn, + fc_dim=mid_dim // 4, + ) def forward( self, @@ -166,20 +186,15 @@ def forward( down_block_res_samples.append(res_samples[0]) # 3. mid - sample = self.mid_block1(sample, temb) - sample = self.mid_block2(sample, temb) + for mid_block in self.mid_blocks: + sample = mid_block(sample, temb) # 4. up for up_block in self.up_blocks: sample = up_block(hidden_states=sample, res_hidden_states=down_block_res_samples.pop(), temb=temb) # 5. post-process - sample = self.final_conv1d_1(sample) - sample = rearrange_dims(sample) - sample = self.final_conv1d_gn(sample) - sample = rearrange_dims(sample) - sample = self.final_conv1d_act(sample) - sample = self.final_conv1d_2(sample) + sample = self.out_block(sample, temb) if not return_dict: return (sample,) diff --git a/src/diffusers/models/unet_1d_blocks.py b/src/diffusers/models/unet_1d_blocks.py index 40e25fb43afb..65a4afbdfc68 100644 --- a/src/diffusers/models/unet_1d_blocks.py +++ b/src/diffusers/models/unet_1d_blocks.py @@ -13,11 +13,12 @@ # limitations under the License. +from turtle import forward import torch import torch.nn.functional as F from torch import nn -from .resnet import Downsample1D, ResidualTemporalBlock1D, Upsample1D +from .resnet import Downsample1D, ResidualTemporalBlock1D, Upsample1D, rearrange_dims class DownResnetBlock1D(nn.Module): @@ -173,6 +174,66 @@ class UpBlock1DNoSkip(nn.Module): pass +class MidResTemporalBlock1D(nn.Module): + def __init__(self, in_channels, out_channels, embed_dim, add_downsample): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.add_downsample = add_downsample + self.resnet = ResidualTemporalBlock1D(in_channels, out_channels, embed_dim=embed_dim) + + if add_downsample: + self.downsample = Downsample1D(out_channels, use_conv=True) + else: + self.downsample = nn.Identity() + + def forward(self, sample, temb): + sample = self.resnet(sample, temb) + sample = self.downsample(sample) + return sample + + +class OutConv1DBlock(nn.Module): + def __init__(self, num_groups_out, embed_dim, out_channels, act_fn): + super().__init__() + self.final_conv1d_1 = nn.Conv1d(embed_dim, embed_dim, 5, padding=2) + self.final_conv1d_gn = nn.GroupNorm(num_groups_out, embed_dim) + if act_fn == "silu": + self.final_conv1d_act = nn.SiLU() + if act_fn == "mish": + self.final_conv1d_act = nn.Mish() + self.final_conv1d_2 = nn.Conv1d(embed_dim, out_channels, 1) + + def forward(self, sample, t): + sample = self.final_conv1d_1(sample) + sample = rearrange_dims(sample) + sample = self.final_conv1d_gn(sample) + sample = rearrange_dims(sample) + sample = self.final_conv1d_act(sample) + sample = self.final_conv1d_2(sample) + return sample + + +class OutValueFunctionBlock(nn.Module): + def __init__(self, fc_dim, embed_dim): + super().__init__() + self.final_block = nn.ModuleList( + [ + nn.Linear(fc_dim + embed_dim, fc_dim // 2), + nn.Mish(), + nn.Linear(fc_dim // 2, 1), + ] + ) + + def forward(self, sample, t): + sample = sample.view(sample.shape[0], -1) + sample = torch.cat((sample, t), dim=-1) + for layer in self.final_block: + sample = layer(sample) + + return sample + + def get_down_block(down_block_type, num_layers, in_channels, out_channels, temb_channels, add_downsample): if down_block_type == "DownResnetBlock1D": return DownResnetBlock1D( @@ -195,5 +256,19 @@ def get_up_block(up_block_type, num_layers, in_channels, out_channels, temb_chan temb_channels=temb_channels, add_upsample=add_upsample, ) - + elif up_block_type == "Identity": + return nn.Identity() raise ValueError(f"{up_block_type} does not exist.") + + +def get_mid_block(mid_block_type, in_channels, out_channels, embed_dim, add_downsample): + if mid_block_type == "MidResTemporalBlock1D": + return MidResTemporalBlock1D(in_channels, out_channels, embed_dim, add_downsample) + raise ValueError(f"{mid_block_type} does not exist.") + + +def get_out_block(*, out_block_type, num_groups_out, embed_dim, out_channels, act_fn, fc_dim): + if out_block_type == "OutConv1DBlock": + return OutConv1DBlock(num_groups_out, out_channels, embed_dim, act_fn) + elif out_block_type == "ValueFunction": + return OutValueFunctionBlock(fc_dim, embed_dim) From 435ad266bae7ac1a1646c1bba392733f035c8f73 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 20 Oct 2022 15:10:18 -0400 Subject: [PATCH 26/32] add pipeline --- examples/community/value_guided_diffuser.py | 88 +++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 examples/community/value_guided_diffuser.py diff --git a/examples/community/value_guided_diffuser.py b/examples/community/value_guided_diffuser.py new file mode 100644 index 000000000000..a90ff63c509e --- /dev/null +++ b/examples/community/value_guided_diffuser.py @@ -0,0 +1,88 @@ +import torch +from diffusers import DiffusionPipeline +import tqdm + +from diffusers.models.unet_1d import UNet1DModel +from diffusers.utils.dummy_pt_objects import DDPMScheduler + + +class ValueGuidedDiffuserPipeline(DiffusionPipeline): + def __init__(self, value_function: UNet1DModel, unet: UNet1DModel, scheduler: DDPMScheduler, env, *args, **kwargs): + super().__init__(*args, **kwargs) + self.value_function = value_function + self.unet = unet + self.scheduler = scheduler + self.env = env + self.data = env.get_dataset() + self.means = dict((key, val.mean(axis=0)) for key, val in self.data.items()) + self.stds = dict((key, val.std(axis=0)) for key, val in self.data.items()) + self.device = self.unet.device + self.state_dim = env.observation_space.shape[0] + self.action_dim = env.action_space.shape[0] + + def normalize(self, x_in, key): + return (x_in - self.means[key]) / self.stds[key] + + def de_normalize(self, x_in, key): + return x_in * self.stds[key] + self.means[key] + + def to_torch(self, x_in): + + if type(x_in) is dict: + return {k: self.to_torch(v) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(self.device) + return torch.tensor(x_in, device=self.device) + + def reset_x0(self, x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + + def run_diffusion(self, x, conditions, n_guide_steps, scale): + batch_size = x.shape[0] + y = None + for i in tqdm.tqdm(self.scheduler.timesteps): + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=self.device, dtype=torch.long) + # 3. call the sample function + for _ in range(n_guide_steps): + with torch.enable_grad(): + x.requires_grad_() + y = self.value_function(x, timesteps).sample + grad = torch.autograd.grad([y.sum()], [x])[0] + + posterior_variance = self.scheduler._get_variance(i) + model_std = torch.exp(0.5 * posterior_variance) + grad = model_std * grad + grad[timesteps < 2] = 0 + x = x.detach() + x = x + scale * grad + x = self.reset_x0(x, conditions, self.action_dim) + # with torch.no_grad(): + prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) + x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] + + # 4. apply conditions to the trajectory + x = self.reset_x0(x, conditions, self.action_dim) + x = self.to_torch(x, device=self.device) + # y = network(x, timesteps).sample + return x, y + + def __call__(self, obs, batch_size=64, planning_horizon=20, n_guide_steps=2, scale=0.1): + obs = self.normalize(obs, "observations") + obs = obs[None].repeat(batch_size, axis=0) + conditions = {0: self.to_torch(obs)} + shape = (batch_size, planning_horizon, self.state_dim + self.action_dim) + x1 = torch.randn(shape, device=self.device) + x = self.reset_x0(x1, conditions, self.action_dim) + x = self.to_torch(x) + x, y = self.run_diffusion(x, conditions, n_guide_steps, scale) + sorted_idx = y.argsort(0, descending=True).squeeze() + sorted_values = x[sorted_idx] + actions = sorted_values[:, :, : self.action_dim] + actions = actions.detach().cpu().numpy() + denorm_actions = self.de_normalize(actions, key="actions") + # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] + denorm_actions = denorm_actions[0, 0] + return denorm_actions From 56534088386ffc2c0e94fa125c10ee37494216a1 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 20 Oct 2022 15:30:50 -0400 Subject: [PATCH 27/32] fix arg order --- scripts/convert_models_diffuser_to_diffusers.py | 2 +- src/diffusers/models/unet_1d_blocks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/convert_models_diffuser_to_diffusers.py b/scripts/convert_models_diffuser_to_diffusers.py index e957f1204596..61e05d261396 100644 --- a/scripts/convert_models_diffuser_to_diffusers.py +++ b/scripts/convert_models_diffuser_to_diffusers.py @@ -72,6 +72,6 @@ def value_function(): if __name__ == "__main__": - # unet(32) + unet(32) # unet(128) value_function() diff --git a/src/diffusers/models/unet_1d_blocks.py b/src/diffusers/models/unet_1d_blocks.py index 65a4afbdfc68..1981a34754c3 100644 --- a/src/diffusers/models/unet_1d_blocks.py +++ b/src/diffusers/models/unet_1d_blocks.py @@ -194,7 +194,7 @@ def forward(self, sample, temb): class OutConv1DBlock(nn.Module): - def __init__(self, num_groups_out, embed_dim, out_channels, act_fn): + def __init__(self, num_groups_out, out_channels, embed_dim, act_fn): super().__init__() self.final_conv1d_1 = nn.Conv1d(embed_dim, embed_dim, 5, padding=2) self.final_conv1d_gn = nn.GroupNorm(num_groups_out, embed_dim) From 149193259cc2f6b71da575aa81c29d563d15a208 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 20 Oct 2022 15:43:01 -0400 Subject: [PATCH 28/32] add pipeline to core library --- .../diffuser/run_diffuser_value_guided.py | 16 ++- src/diffusers/__init__.py | 2 +- src/diffusers/pipelines/__init__.py | 1 + src/diffusers/pipelines/diffuser/__init__.py | 1 + .../diffuser/pipeline_value_guided.py | 98 +++++++++++++++++++ 5 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 src/diffusers/pipelines/diffuser/__init__.py create mode 100644 src/diffusers/pipelines/diffuser/pipeline_value_guided.py diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 9610a07c366f..a114d9d2917a 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -4,8 +4,9 @@ import d4rl # noqa import gym import tqdm -import train_diffuser -from diffusers import DDPMScheduler, UNet1DModel, ValueFunction + +# import train_diffuser +from diffusers import DDPMScheduler, UNet1DModel, DiffusionPipeline config = dict( @@ -25,7 +26,7 @@ def _run(): env_name = "hopper-medium-v2" env = gym.make(env_name) data = env.get_dataset() # dataset is only used for normalization in this colab - render = train_diffuser.MuJoCoRenderer(env) + # render = train_diffuser.MuJoCoRenderer(env) # Cuda settings for colab # torch.cuda.get_device_name(0) @@ -47,8 +48,15 @@ def _run(): # The horizion represents the length of trajectories used in training. # network = ValueFunction(training_horizon=horizon, dim=32, dim_mults=(1, 2, 4, 8), transition_dim=14, cond_dim=11) - network = ValueFunction.from_pretrained("bglick13/hopper-medium-v2-value-function-hor32").to(device=DEVICE).eval() + network = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-value-function-hor32").to(device=DEVICE).eval() unet = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-unet-hor32").to(device=DEVICE).eval() + pipeline = DiffusionPipeline.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", + value_function=network, + unet=unet, + scheduler=scheduler, + env=env, + ) # unet = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 7088e560dd66..edc97563c707 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -29,7 +29,7 @@ get_scheduler, ) from .pipeline_utils import DiffusionPipeline - from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline + from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline, ValueGuidedDiffuserPipeline from .schedulers import ( DDIMScheduler, DDPMScheduler, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index 1c31595fb0cf..8edec55188fa 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -8,6 +8,7 @@ from .pndm import PNDMPipeline from .score_sde_ve import ScoreSdeVePipeline from .stochastic_karras_ve import KarrasVePipeline + from .diffuser import ValueGuidedDiffuserPipeline else: from ..utils.dummy_pt_objects import * # noqa F403 diff --git a/src/diffusers/pipelines/diffuser/__init__.py b/src/diffusers/pipelines/diffuser/__init__.py new file mode 100644 index 000000000000..23d3e16c48d3 --- /dev/null +++ b/src/diffusers/pipelines/diffuser/__init__.py @@ -0,0 +1 @@ +from .pipeline_value_guided import ValueGuidedDiffuserPipeline diff --git a/src/diffusers/pipelines/diffuser/pipeline_value_guided.py b/src/diffusers/pipelines/diffuser/pipeline_value_guided.py new file mode 100644 index 000000000000..622ddb7cf608 --- /dev/null +++ b/src/diffusers/pipelines/diffuser/pipeline_value_guided.py @@ -0,0 +1,98 @@ +from numpy import AxisError +import torch +from diffusers import DiffusionPipeline +import tqdm + +from diffusers.models.unet_1d import UNet1DModel +from diffusers.utils.dummy_pt_objects import DDPMScheduler + + +class ValueGuidedDiffuserPipeline(DiffusionPipeline): + def __init__(self, value_function: UNet1DModel, unet: UNet1DModel, scheduler: DDPMScheduler, env): + super().__init__() + self.value_function = value_function + self.unet = unet + self.scheduler = scheduler + self.env = env + self.data = env.get_dataset() + self.means = dict() + for key, val in self.data.items(): + try: + self.means[key] = val.mean(axis=0) + except AxisError: # Not everything in the dataset is an array + pass + self.stds = dict() + for key, val in self.data.items(): + try: + self.stds[key] = val.std(axis=0) + except AxisError: + pass + self.state_dim = env.observation_space.shape[0] + self.action_dim = env.action_space.shape[0] + + def normalize(self, x_in, key): + return (x_in - self.means[key]) / self.stds[key] + + def de_normalize(self, x_in, key): + return x_in * self.stds[key] + self.means[key] + + def to_torch(self, x_in): + + if type(x_in) is dict: + return {k: self.to_torch(v) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(self.unet.device) + return torch.tensor(x_in, device=self.unet.device) + + def reset_x0(self, x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + + def run_diffusion(self, x, conditions, n_guide_steps, scale): + batch_size = x.shape[0] + y = None + for i in tqdm.tqdm(self.scheduler.timesteps): + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=self.unet.device, dtype=torch.long) + # 3. call the sample function + for _ in range(n_guide_steps): + with torch.enable_grad(): + x.requires_grad_() + y = self.value_function(x, timesteps).sample + grad = torch.autograd.grad([y.sum()], [x])[0] + + posterior_variance = self.scheduler._get_variance(i) + model_std = torch.exp(0.5 * posterior_variance) + grad = model_std * grad + grad[timesteps < 2] = 0 + x = x.detach() + x = x + scale * grad + x = self.reset_x0(x, conditions, self.action_dim) + # with torch.no_grad(): + prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) + x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] + + # 4. apply conditions to the trajectory + x = self.reset_x0(x, conditions, self.action_dim) + x = self.to_torch(x, device=self.unet.device) + # y = network(x, timesteps).sample + return x, y + + def __call__(self, obs, batch_size=64, planning_horizon=20, n_guide_steps=2, scale=0.1): + obs = self.normalize(obs, "observations") + obs = obs[None].repeat(batch_size, axis=0) + conditions = {0: self.to_torch(obs)} + shape = (batch_size, planning_horizon, self.state_dim + self.action_dim) + x1 = torch.randn(shape, device=self.unet.device) + x = self.reset_x0(x1, conditions, self.action_dim) + x = self.to_torch(x) + x, y = self.run_diffusion(x, conditions, n_guide_steps, scale) + sorted_idx = y.argsort(0, descending=True).squeeze() + sorted_values = x[sorted_idx] + actions = sorted_values[:, :, : self.action_dim] + actions = actions.detach().cpu().numpy() + denorm_actions = self.de_normalize(actions, key="actions") + # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] + denorm_actions = denorm_actions[0, 0] + return denorm_actions From 1a8098ed403d20c220e7b163881e422e0e225ceb Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 20 Oct 2022 19:52:11 -0400 Subject: [PATCH 29/32] community pipeline --- .../community/pipeline.py | 23 +++++++----- examples/community/value_guided_diffuser.py | 37 +++++++++++++------ .../diffuser/run_diffuser_value_guided.py | 3 +- src/diffusers/__init__.py | 2 +- src/diffusers/pipelines/__init__.py | 1 - src/diffusers/pipelines/diffuser/__init__.py | 1 - 6 files changed, 43 insertions(+), 24 deletions(-) rename src/diffusers/pipelines/diffuser/pipeline_value_guided.py => examples/community/pipeline.py (90%) delete mode 100644 src/diffusers/pipelines/diffuser/__init__.py diff --git a/src/diffusers/pipelines/diffuser/pipeline_value_guided.py b/examples/community/pipeline.py similarity index 90% rename from src/diffusers/pipelines/diffuser/pipeline_value_guided.py rename to examples/community/pipeline.py index 622ddb7cf608..5a28d6c222e6 100644 --- a/src/diffusers/pipelines/diffuser/pipeline_value_guided.py +++ b/examples/community/pipeline.py @@ -1,14 +1,19 @@ -from numpy import AxisError import torch from diffusers import DiffusionPipeline import tqdm - +from numpy import AxisError from diffusers.models.unet_1d import UNet1DModel from diffusers.utils.dummy_pt_objects import DDPMScheduler class ValueGuidedDiffuserPipeline(DiffusionPipeline): - def __init__(self, value_function: UNet1DModel, unet: UNet1DModel, scheduler: DDPMScheduler, env): + def __init__( + self, + value_function: UNet1DModel, + unet: UNet1DModel, + scheduler: DDPMScheduler, + env, + ): super().__init__() self.value_function = value_function self.unet = unet @@ -16,16 +21,16 @@ def __init__(self, value_function: UNet1DModel, unet: UNet1DModel, scheduler: DD self.env = env self.data = env.get_dataset() self.means = dict() - for key, val in self.data.items(): + for key in self.data.keys(): try: - self.means[key] = val.mean(axis=0) - except AxisError: # Not everything in the dataset is an array + self.means[key] = self.data[key].mean() + except: pass self.stds = dict() - for key, val in self.data.items(): + for key in self.data.keys(): try: - self.stds[key] = val.std(axis=0) - except AxisError: + self.stds[key] = self.data[key].std() + except: pass self.state_dim = env.observation_space.shape[0] self.action_dim = env.action_space.shape[0] diff --git a/examples/community/value_guided_diffuser.py b/examples/community/value_guided_diffuser.py index a90ff63c509e..5a28d6c222e6 100644 --- a/examples/community/value_guided_diffuser.py +++ b/examples/community/value_guided_diffuser.py @@ -1,22 +1,37 @@ import torch from diffusers import DiffusionPipeline import tqdm - +from numpy import AxisError from diffusers.models.unet_1d import UNet1DModel from diffusers.utils.dummy_pt_objects import DDPMScheduler class ValueGuidedDiffuserPipeline(DiffusionPipeline): - def __init__(self, value_function: UNet1DModel, unet: UNet1DModel, scheduler: DDPMScheduler, env, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__( + self, + value_function: UNet1DModel, + unet: UNet1DModel, + scheduler: DDPMScheduler, + env, + ): + super().__init__() self.value_function = value_function self.unet = unet self.scheduler = scheduler self.env = env self.data = env.get_dataset() - self.means = dict((key, val.mean(axis=0)) for key, val in self.data.items()) - self.stds = dict((key, val.std(axis=0)) for key, val in self.data.items()) - self.device = self.unet.device + self.means = dict() + for key in self.data.keys(): + try: + self.means[key] = self.data[key].mean() + except: + pass + self.stds = dict() + for key in self.data.keys(): + try: + self.stds[key] = self.data[key].std() + except: + pass self.state_dim = env.observation_space.shape[0] self.action_dim = env.action_space.shape[0] @@ -31,8 +46,8 @@ def to_torch(self, x_in): if type(x_in) is dict: return {k: self.to_torch(v) for k, v in x_in.items()} elif torch.is_tensor(x_in): - return x_in.to(self.device) - return torch.tensor(x_in, device=self.device) + return x_in.to(self.unet.device) + return torch.tensor(x_in, device=self.unet.device) def reset_x0(self, x_in, cond, act_dim): for key, val in cond.items(): @@ -44,7 +59,7 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): y = None for i in tqdm.tqdm(self.scheduler.timesteps): # create batch of timesteps to pass into model - timesteps = torch.full((batch_size,), i, device=self.device, dtype=torch.long) + timesteps = torch.full((batch_size,), i, device=self.unet.device, dtype=torch.long) # 3. call the sample function for _ in range(n_guide_steps): with torch.enable_grad(): @@ -65,7 +80,7 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): # 4. apply conditions to the trajectory x = self.reset_x0(x, conditions, self.action_dim) - x = self.to_torch(x, device=self.device) + x = self.to_torch(x, device=self.unet.device) # y = network(x, timesteps).sample return x, y @@ -74,7 +89,7 @@ def __call__(self, obs, batch_size=64, planning_horizon=20, n_guide_steps=2, sca obs = obs[None].repeat(batch_size, axis=0) conditions = {0: self.to_torch(obs)} shape = (batch_size, planning_horizon, self.state_dim + self.action_dim) - x1 = torch.randn(shape, device=self.device) + x1 = torch.randn(shape, device=self.unet.device) x = self.reset_x0(x1, conditions, self.action_dim) x = self.to_torch(x) x, y = self.run_diffusion(x, conditions, n_guide_steps, scale) diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index a114d9d2917a..8b8b708ee968 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -25,7 +25,7 @@ def _run(): env_name = "hopper-medium-v2" env = gym.make(env_name) - data = env.get_dataset() # dataset is only used for normalization in this colab + # data = env.get_dataset() # dataset is only used for normalization in this colab # render = train_diffuser.MuJoCoRenderer(env) # Cuda settings for colab @@ -56,6 +56,7 @@ def _run(): unet=unet, scheduler=scheduler, env=env, + custom_pipeline="/Users/bglickenhaus/Documents/diffusers/examples/community", ) # unet = UNet1DModel.from_pretrained("fusing/ddpm-unet-rl-hopper-hor128").to(device=DEVICE) # network = TemporalUNet.from_pretrained("fusing/ddpm-unet-rl-hopper-hor512").to(device=DEVICE) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index edc97563c707..7088e560dd66 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -29,7 +29,7 @@ get_scheduler, ) from .pipeline_utils import DiffusionPipeline - from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline, ValueGuidedDiffuserPipeline + from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline from .schedulers import ( DDIMScheduler, DDPMScheduler, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index 8edec55188fa..1c31595fb0cf 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -8,7 +8,6 @@ from .pndm import PNDMPipeline from .score_sde_ve import ScoreSdeVePipeline from .stochastic_karras_ve import KarrasVePipeline - from .diffuser import ValueGuidedDiffuserPipeline else: from ..utils.dummy_pt_objects import * # noqa F403 diff --git a/src/diffusers/pipelines/diffuser/__init__.py b/src/diffusers/pipelines/diffuser/__init__.py deleted file mode 100644 index 23d3e16c48d3..000000000000 --- a/src/diffusers/pipelines/diffuser/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .pipeline_value_guided import ValueGuidedDiffuserPipeline From 0e4be7560befe50e6bf1c876081687bc382c9364 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 20 Oct 2022 20:17:43 -0400 Subject: [PATCH 30/32] fix couple shape bugs --- examples/community/pipeline.py | 6 +-- examples/community/value_guided_diffuser.py | 6 +-- .../diffuser/run_diffuser_value_guided.py | 42 +------------------ 3 files changed, 8 insertions(+), 46 deletions(-) diff --git a/examples/community/pipeline.py b/examples/community/pipeline.py index 5a28d6c222e6..0f0b505ce7b2 100644 --- a/examples/community/pipeline.py +++ b/examples/community/pipeline.py @@ -64,7 +64,7 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): for _ in range(n_guide_steps): with torch.enable_grad(): x.requires_grad_() - y = self.value_function(x, timesteps).sample + y = self.value_function(x.permute(0, 2, 1), timesteps).sample grad = torch.autograd.grad([y.sum()], [x])[0] posterior_variance = self.scheduler._get_variance(i) @@ -80,11 +80,11 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): # 4. apply conditions to the trajectory x = self.reset_x0(x, conditions, self.action_dim) - x = self.to_torch(x, device=self.unet.device) + x = self.to_torch(x) # y = network(x, timesteps).sample return x, y - def __call__(self, obs, batch_size=64, planning_horizon=20, n_guide_steps=2, scale=0.1): + def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1): obs = self.normalize(obs, "observations") obs = obs[None].repeat(batch_size, axis=0) conditions = {0: self.to_torch(obs)} diff --git a/examples/community/value_guided_diffuser.py b/examples/community/value_guided_diffuser.py index 5a28d6c222e6..0f0b505ce7b2 100644 --- a/examples/community/value_guided_diffuser.py +++ b/examples/community/value_guided_diffuser.py @@ -64,7 +64,7 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): for _ in range(n_guide_steps): with torch.enable_grad(): x.requires_grad_() - y = self.value_function(x, timesteps).sample + y = self.value_function(x.permute(0, 2, 1), timesteps).sample grad = torch.autograd.grad([y.sum()], [x])[0] posterior_variance = self.scheduler._get_variance(i) @@ -80,11 +80,11 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): # 4. apply conditions to the trajectory x = self.reset_x0(x, conditions, self.action_dim) - x = self.to_torch(x, device=self.unet.device) + x = self.to_torch(x) # y = network(x, timesteps).sample return x, y - def __call__(self, obs, batch_size=64, planning_horizon=20, n_guide_steps=2, scale=0.1): + def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1): obs = self.normalize(obs, "observations") obs = obs[None].repeat(batch_size, axis=0) conditions = {0: self.to_torch(obs)} diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 8b8b708ee968..aec2d826d1a2 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -6,7 +6,7 @@ import tqdm # import train_diffuser -from diffusers import DDPMScheduler, UNet1DModel, DiffusionPipeline +from diffusers import DDPMScheduler, UNet1DModel, DiffusionPipeline, UNet1DModel config = dict( @@ -75,43 +75,7 @@ def _run(): for t in tqdm.tqdm(range(T)): # 1. Call the policy # normalize observations for forward passes - obs = train_diffuser.normalize(obs, data, "observations") - - obs = obs[None].repeat(config["n_samples"], axis=0) - conditions = {0: train_diffuser.to_torch(obs, device=DEVICE)} - - # 2. Call the diffusion model - # constants for inference - batch_size = len(conditions[0]) - shape = (batch_size, config["horizon"], state_dim + action_dim) - - # sample random initial noise vector - x1 = torch.randn(shape, device=DEVICE) - - # this model is conditioned from an initial state, so you will see this function - # multiple times to change the initial state of generated data to the state - # generated via env.reset() above or env.step() below - x = train_diffuser.reset_x0(x1, conditions, action_dim) - - # convert a np observation to torch for model forward pass - x = train_diffuser.to_torch(x, device=DEVICE) - x, y = train_diffuser.run_diffusion(x, scheduler, network, unet, conditions, action_dim, config) - if y is not None: - sorted_idx = y.argsort(0, descending=True).squeeze() - y_maxes.append(y[sorted_idx[0]].detach().cpu().numpy()) - sorted_values = x[sorted_idx] - else: - sorted_values = x - actions = sorted_values[:, :, :action_dim] - if t % 10 == 0: - trajectory = sorted_values[:, :, action_dim:][0].unsqueeze(0).detach().cpu().numpy() - trajectory = train_diffuser.de_normalize(trajectory, data, "observations") - trajectories.append(trajectory) - - actions = actions.detach().cpu().numpy() - denorm_actions = train_diffuser.de_normalize(actions, data, key="actions") - # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] - denorm_actions = denorm_actions[0, 0] + denorm_actions = pipeline(obs, planning_horizon=32) # execute action in environment next_observation, reward, terminal, _ = env.step(denorm_actions) @@ -132,8 +96,6 @@ def _run(): print(f"Total reward: {total_reward}") - train_diffuser.show_sample(render, np.expand_dims(np.stack(rollout), axis=0)) - def run(): _run() From 5ef88ef56862816ab909eefc33eee59248c73f58 Mon Sep 17 00:00:00 2001 From: Ben Glickenhaus Date: Thu, 20 Oct 2022 20:19:35 -0400 Subject: [PATCH 31/32] style --- examples/community/pipeline.py | 5 ++--- examples/community/value_guided_diffuser.py | 5 ++--- examples/diffuser/run_diffuser_value_guided.py | 11 +---------- scripts/convert_models_diffuser_to_diffusers.py | 2 +- src/diffusers/models/unet_1d.py | 2 -- src/diffusers/models/unet_1d_blocks.py | 1 - 6 files changed, 6 insertions(+), 20 deletions(-) diff --git a/examples/community/pipeline.py b/examples/community/pipeline.py index 0f0b505ce7b2..5159de402b3a 100644 --- a/examples/community/pipeline.py +++ b/examples/community/pipeline.py @@ -1,7 +1,7 @@ import torch -from diffusers import DiffusionPipeline + import tqdm -from numpy import AxisError +from diffusers import DiffusionPipeline from diffusers.models.unet_1d import UNet1DModel from diffusers.utils.dummy_pt_objects import DDPMScheduler @@ -42,7 +42,6 @@ def de_normalize(self, x_in, key): return x_in * self.stds[key] + self.means[key] def to_torch(self, x_in): - if type(x_in) is dict: return {k: self.to_torch(v) for k, v in x_in.items()} elif torch.is_tensor(x_in): diff --git a/examples/community/value_guided_diffuser.py b/examples/community/value_guided_diffuser.py index 0f0b505ce7b2..5159de402b3a 100644 --- a/examples/community/value_guided_diffuser.py +++ b/examples/community/value_guided_diffuser.py @@ -1,7 +1,7 @@ import torch -from diffusers import DiffusionPipeline + import tqdm -from numpy import AxisError +from diffusers import DiffusionPipeline from diffusers.models.unet_1d import UNet1DModel from diffusers.utils.dummy_pt_objects import DDPMScheduler @@ -42,7 +42,6 @@ def de_normalize(self, x_in, key): return x_in * self.stds[key] + self.means[key] def to_torch(self, x_in): - if type(x_in) is dict: return {k: self.to_torch(v) for k, v in x_in.items()} elif torch.is_tensor(x_in): diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index aec2d826d1a2..11f36d1ada13 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -1,12 +1,9 @@ -import numpy as np -import torch - import d4rl # noqa import gym import tqdm # import train_diffuser -from diffusers import DDPMScheduler, UNet1DModel, DiffusionPipeline, UNet1DModel +from diffusers import DDPMScheduler, DiffusionPipeline, UNet1DModel config = dict( @@ -32,10 +29,6 @@ def _run(): # torch.cuda.get_device_name(0) DEVICE = config["device"] - # diffusion model settings - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] - # Two generators for different parts of the diffusion loop to work in colab scheduler = DDPMScheduler( num_train_timesteps=config["num_inference_steps"], @@ -69,8 +62,6 @@ def _run(): total_score = 0 T = 1000 rollout = [obs.copy()] - trajectories = [] - y_maxes = [0] try: for t in tqdm.tqdm(range(T)): # 1. Call the policy diff --git a/scripts/convert_models_diffuser_to_diffusers.py b/scripts/convert_models_diffuser_to_diffusers.py index 61e05d261396..b154295e9726 100644 --- a/scripts/convert_models_diffuser_to_diffusers.py +++ b/scripts/convert_models_diffuser_to_diffusers.py @@ -51,7 +51,7 @@ def value_function(): out_block_type="ValueFunction", block_out_channels=(32, 64, 128, 256), layers_per_block=1, - always_downsample=True + always_downsample=True, ) model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-mediumv2-hor32.torch") diff --git a/src/diffusers/models/unet_1d.py b/src/diffusers/models/unet_1d.py index d0483609fead..b720c78b8833 100644 --- a/src/diffusers/models/unet_1d.py +++ b/src/diffusers/models/unet_1d.py @@ -17,14 +17,12 @@ import torch import torch.nn as nn -from diffusers.models.resnet import ResidualTemporalBlock1D from diffusers.models.unet_1d_blocks import get_down_block, get_mid_block, get_out_block, get_up_block from ..configuration_utils import ConfigMixin, register_to_config from ..modeling_utils import ModelMixin from ..utils import BaseOutput from .embeddings import TimestepEmbedding, Timesteps -from .resnet import rearrange_dims @dataclass diff --git a/src/diffusers/models/unet_1d_blocks.py b/src/diffusers/models/unet_1d_blocks.py index 1981a34754c3..a00372faf7d9 100644 --- a/src/diffusers/models/unet_1d_blocks.py +++ b/src/diffusers/models/unet_1d_blocks.py @@ -13,7 +13,6 @@ # limitations under the License. -from turtle import forward import torch import torch.nn.functional as F from torch import nn From c6d94cef50b17f1f4626965204c97cee733a7221 Mon Sep 17 00:00:00 2001 From: Nathan Lambert Date: Thu, 20 Oct 2022 19:59:46 -0700 Subject: [PATCH 32/32] Apply suggestions from code review --- examples/community/pipeline.py | 3 --- examples/community/value_guided_diffuser.py | 3 --- examples/diffuser/run_diffuser.py | 3 --- examples/diffuser/run_diffuser_value_guided.py | 2 -- 4 files changed, 11 deletions(-) diff --git a/examples/community/pipeline.py b/examples/community/pipeline.py index 5159de402b3a..7e3f2b832b1f 100644 --- a/examples/community/pipeline.py +++ b/examples/community/pipeline.py @@ -73,14 +73,12 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): x = x.detach() x = x + scale * grad x = self.reset_x0(x, conditions, self.action_dim) - # with torch.no_grad(): prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] # 4. apply conditions to the trajectory x = self.reset_x0(x, conditions, self.action_dim) x = self.to_torch(x) - # y = network(x, timesteps).sample return x, y def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1): @@ -97,6 +95,5 @@ def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, sca actions = sorted_values[:, :, : self.action_dim] actions = actions.detach().cpu().numpy() denorm_actions = self.de_normalize(actions, key="actions") - # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] denorm_actions = denorm_actions[0, 0] return denorm_actions diff --git a/examples/community/value_guided_diffuser.py b/examples/community/value_guided_diffuser.py index 5159de402b3a..7e3f2b832b1f 100644 --- a/examples/community/value_guided_diffuser.py +++ b/examples/community/value_guided_diffuser.py @@ -73,14 +73,12 @@ def run_diffusion(self, x, conditions, n_guide_steps, scale): x = x.detach() x = x + scale * grad x = self.reset_x0(x, conditions, self.action_dim) - # with torch.no_grad(): prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] # 4. apply conditions to the trajectory x = self.reset_x0(x, conditions, self.action_dim) x = self.to_torch(x) - # y = network(x, timesteps).sample return x, y def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1): @@ -97,6 +95,5 @@ def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, sca actions = sorted_values[:, :, : self.action_dim] actions = actions.detach().cpu().numpy() denorm_actions = self.de_normalize(actions, key="actions") - # denorm_actions = denorm_actions[np.random.randint(config['n_samples']), 0] denorm_actions = denorm_actions[0, 0] return denorm_actions diff --git a/examples/diffuser/run_diffuser.py b/examples/diffuser/run_diffuser.py index ad35b59d99a7..b29d89992dfc 100644 --- a/examples/diffuser/run_diffuser.py +++ b/examples/diffuser/run_diffuser.py @@ -12,8 +12,6 @@ env = gym.make(env_name) data = env.get_dataset() # dataset is only used for normalization in this colab -# Cuda settings for colab -# torch.cuda.get_device_name(0) DEVICE = "cpu" DTYPE = torch.float @@ -41,7 +39,6 @@ clip_denoised = network.clip_denoised predict_epsilon = network.predict_epsilon -# add a batch dimension and repeat for multiple samples # [ observation_dim ] --> [ n_samples x observation_dim ] obs = env.reset() total_reward = 0 diff --git a/examples/diffuser/run_diffuser_value_guided.py b/examples/diffuser/run_diffuser_value_guided.py index 11f36d1ada13..4272ec2c3106 100644 --- a/examples/diffuser/run_diffuser_value_guided.py +++ b/examples/diffuser/run_diffuser_value_guided.py @@ -22,8 +22,6 @@ def _run(): env_name = "hopper-medium-v2" env = gym.make(env_name) - # data = env.get_dataset() # dataset is only used for normalization in this colab - # render = train_diffuser.MuJoCoRenderer(env) # Cuda settings for colab # torch.cuda.get_device_name(0)