upload DDPM inference script

Browse files

Files changed (15) hide show

.gitattributes +1 -0
.gitignore +1 -0
README.md +88 -0
docs/BT202504010900-ddpm.gif +3 -0
inference.py +127 -0
model_architect/UNet_DDPM.py +372 -0
model_architect/layers.py +223 -0
model_architect/utils.py +56 -0
model_architect/weight_init.py +40 -0
model_weights/ft06_01hr/weights.ckpt +3 -0
model_weights/ft36_06hr/weights.ckpt +3 -0
requirements.txt +3 -0
sample_data/sample_202504131100.npz +3 -0
sample_data/sample_202504161200.npz +3 -0
sample_data/sample_202507151200.npz +3 -0

.gitattributes CHANGED Viewed

@@ -4,6 +4,7 @@
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text

 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

README.md ADDED Viewed

	@@ -0,0 +1,88 @@

+---
+license: mit
+tags:
+- solar-radiation
+- deep-learning
+- nowcasting
+- ddpm
+- MCVD
+---
+# DDPM Solar Radiation model
+A deep learning model for solar radiation nowcasting using modified [MCVD](https://arxiv.org/pdf/2205.09853) model, a kind of DDPM model for video generation. The model predicts clearsky index and converts it to solar radiation for up to 6 or 36 time steps ahead.
+![Solar Prediction Example](docs/BT202504010900-ddpm.gif)
+## Overview
+This repository contains two trained models (1hr & 6hr) for solar radiation forecasting:
+- **1hr DDPM Model**: Predicts solar radiation up to 1 hour ahead (6 time steps)
+- **6hr DDPM Model**: Predicts solar radiation up to 6 hours ahead (36 time steps).
+The model uses multiple input sources:
+- **Himawari satellite data**: Clearsky index calculated from Himawari satellite data
+- **WRF Prediction**: Clearsky index from WRF's solar irradiation prediction
+- **Topography**: Static topographical features
+## Installation
+1. Clone the repository & install Git LFS:
+```bash
+git lfs install
+git clone <repository-url>
+cd Diffusion_SolRad
+git lfs pull
+git lfs ls-files # confirm whether models weights & sample data are downloaded
+```
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+## Requirements
+- Python 3.x
+- PyTorch 2.4.0
+- NumPy 1.26.4
+- einops 0.8.0
+## Usage
+### Basic Inference
+Run solar radiation prediction using the pre-trained models:
+```bash
+python inference.py --pred-hr [1hr/6hr] --pred-mode [DDPM/DDIM] --basetime 202504131100
+```
+### Command Line Arguments
+- `pred-mode`: Choose between `DDPM` or `DDIM` sampling methods (default: `DDPM`)
+- `pred-hr`: Choose between `1hr` or `6hr` prediction models (default: `1hr`)
+- `--basetime`: Timestamp for input data in format YYYYMMDDHHMM (default: `202504131100`)
+### Example
+```bash
+# DDIM sampling method for 1-hour prediction
+python inference.py --pred-hr 1hr --pred-mode DDIM --basetime 202507151200
+```
+## Sample Data
+The repository includes sample data files:
+- `sample_202504131100.npz`
+- `sample_202504161200.npz`
+- `sample_202507151200.npz`
+## Model Weights
+Pre-trained weights are available for both models:
+- `model_weights/ft06_01hr/weights.ckpt`
+- `model_weights/ft36_06hr/weights.ckpt`
+## License
+This project is released under the MIT License.

docs/BT202504010900-ddpm.gif ADDED Viewed

Git LFS Details

SHA256: 6bee19570264f935772aad9c9f9f6a4624ee95a0e99277c2461efb72846ca42f
Pointer size: 132 Bytes
Size of remote file: 3.97 MB

inference.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import time
+import argparse
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Sequence
+import sys
+from datetime import datetime, timedelta
+import numpy as np
+import torch
+import torch.nn.functional as F
+from model_architect.UNet_DDPM import UNet_with_time, DDPM
+@dataclass
+class Config:
+    input_frame: int = 12
+    output_frame: int = 6
+    cond_nc: int = 5
+    time_emb_dim: int = 128
+    base_chs: int = 32
+    chs_mult: tuple = (1, 2, 4, 8, 8) ## different resolution
+    use_attn_list: tuple = (0, 0, 1, 1, 1) # 0 means no attention, 1 means use attention
+    n_res_blocks: int = 2
+    n_steps: int = 1000
+    dropout: float = 0.1
+def data_loading(BASETIME, device):
+    data_npz = np.load(f'./sample_data/sample_{BASETIME}.npz')
+    inputs = {}
+    for key in data_npz:
+        inputs[key] = torch.from_numpy(data_npz[key]).to(device)
+    return inputs
+def arg_parse():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--pred-hr',
+        type=str,
+        default='1hr',
+        choices=[
+            '1hr',
+            '6hr'
+        ]
+    )
+    parser.add_argument(
+        '--pred-mode',
+        type=str,
+        default='DDPM',
+        choices=[
+            'DDPM',
+            'DDIM'
+        ]
+    )
+    parser.add_argument('--basetime', type=str, default='202504131100')
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    config = Config()
+    args = arg_parse()
+    pred_hr = args.pred_hr
+    pred_mode = args.pred_mode
+    BASETIME = args.basetime
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    inputs = data_loading(BASETIME, device)
+    model_config = Config()
+    if pred_hr == '6hr':
+        model_config.input_frame = 72
+        model_config.output_frame = 36
+    print("Prediction mode:", pred_mode)
+    print("Prediction horizon:", pred_hr)
+    ## preporcess inputs for DDPM model
+    ## concat previous Himawari and topo as conditional input (B, 5, 512, 512)
+    ## WRF dim: (B, 36, 512, 512). 1hr: (B, 6, 512, 512), 6hr: (B, 36, 512, 512)
+    prev_himawari = inputs['Himawari'].squeeze(2)
+    topo = inputs['topo']
+    input_ = torch.cat([prev_himawari, topo], dim=1)
+    WRF = F.interpolate(inputs['WRF'].squeeze(2), scale_factor=4, mode='bilinear')
+    clearsky = inputs['clearsky']
+    if pred_hr == '1hr':
+        WRF = WRF[:, :6]
+        clearsky = clearsky[:, :6]
+    backbone = UNet_with_time(model_config)
+    model = DDPM(backbone, output_shape=(model_config.output_frame, 512, 512))
+    ## load model weights
+    if pred_hr == '1hr':
+        ckpt_path = './model_weights/ft06_01hr/weights.ckpt'
+    elif pred_hr == '6hr':
+        ckpt_path = './model_weights/ft36_06hr/weights.ckpt'
+    ckpt = torch.load(ckpt_path, weights_only=True)
+    model.load_state_dict(ckpt['state_dict'])
+    model.eval()
+    model = model.to(device)
+    if pred_mode == 'DDPM':
+        pred_clr_idx = model.sample_ddpm(
+            input_,
+            input_cond=WRF,
+            verbose="text"
+        )
+    elif pred_mode == 'DDIM':
+        pred_clr_idx = model.sample_ddim(
+            input_,
+            input_cond=WRF,
+            ddim_steps=100,
+            verbose="text"
+        )
+    pred_clr_idx = (pred_clr_idx + 1.0) / 2.0
+    pred_clr_idx = pred_clr_idx.clamp(0.0, 1.0)
+    ## transform clearsky index to solar radiation
+    pred_srad = pred_clr_idx * clearsky
+    ## save prediction
+    np.save(f'./pred_{BASETIME}_{pred_hr}_{pred_mode}.npy', pred_srad.cpu().numpy())
+    print('Done')

model_architect/UNet_DDPM.py ADDED Viewed

	@@ -0,0 +1,372 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .layers import ResidualBlock, AttnBlock
+from .utils import get_named_beta_schedule
+def sinusoidal_embedding(n, d):
+    """
+    n: iteration steps,
+    d: time embedding dimension
+    """
+    # Returns the standard positional embedding
+    embedding = torch.tensor([[i / 10000 ** (2 * j / d) for j in range(d)] for i in range(n)])
+    sin_mask = torch.arange(0, n, 2)
+    embedding[sin_mask] = torch.sin(embedding[sin_mask])
+    embedding[1 - sin_mask] = torch.cos(embedding[sin_mask])
+    return embedding
+def _make_te(dim_in, dim_out):
+        return nn.Sequential(
+            nn.Linear(dim_in, dim_out),
+            nn.SiLU(),
+            nn.Linear(dim_out, dim_out)
+        )
+class UNet_with_time(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        input_frame = config.input_frame
+        output_frame = config.output_frame
+        n_steps = config.n_steps
+        time_emb_dim = config.time_emb_dim
+        cond_nc = config.cond_nc
+        chs_mult = config.chs_mult ## e.g. (1, 2, 4, 8)
+        n_res_blocks = config.n_res_blocks
+        base_chs = config.base_chs
+        ## e.g. (0, 0, 1, 1) -> 0 means no attention
+        use_attn_list = config.use_attn_list
+        layer_depth = len(chs_mult)
+        assert len(use_attn_list) == layer_depth, "length of use_attn_list should be the same as chs_mult"
+        assert input_frame >= output_frame, "input_frame should be larger than or equal to output_frame"
+        self.filter_list = [base_chs * m for m in chs_mult]
+        ## time embedding
+        self.time_embed = nn.Embedding(n_steps, time_emb_dim)
+        self.time_embed.weight.data = sinusoidal_embedding(n_steps, time_emb_dim)
+        self.time_embed.requires_grad_(False)
+        self.time_embed_fc = _make_te(time_emb_dim, time_emb_dim)
+        ## end of time embedding
+        ## input conv
+        self.input_layer = nn.PixelUnshuffle(downscale_factor=2)
+        ## downsampling
+        self.down_blocks = nn.ModuleList()
+        in_c = input_frame * 4 ## after pixel unshuffle
+        for i in range(layer_depth):
+            out_c = self.filter_list[i]
+            for _ in range(n_res_blocks):
+                self.down_blocks.append(
+                    ResidualBlock(in_c, in_c, cond_nc, time_emb_dim, down_flag=False, up_flag=False)
+                )
+            if use_attn_list[i]:
+                self.down_blocks.append(AttnBlock(in_c, 4)) ## num_head=4
+            self.down_blocks.append(
+                ResidualBlock(in_c, out_c, cond_nc, time_emb_dim, down_flag=True, up_flag=False)
+            )
+            in_c = out_c
+        ## end of downsampling
+        ## middle
+        self.mid_block1 = ResidualBlock(in_c, in_c, cond_nc, time_emb_dim, down_flag=False, up_flag=False)
+        self.mid_attn = AttnBlock(in_c, 4)
+        self.mid_block2 = ResidualBlock(in_c, in_c, cond_nc, time_emb_dim, down_flag=False, up_flag=False)
+        ## end of middle
+        ## upsampling
+        self.up_blocks = nn.ModuleList()
+        self.filter_list = [input_frame * 4] + self.filter_list[:-1]
+        for i in reversed(range(layer_depth)): ## i = layer_depth-1, ..., 0
+            out_c = self.filter_list[i]
+            self.up_blocks.append(
+                ResidualBlock(in_c*2, out_c, cond_nc, time_emb_dim, down_flag=False, up_flag=True)
+            )
+            if use_attn_list[i]:
+                self.up_blocks.append(AttnBlock(out_c)) ## num_head=1
+            for _ in range(n_res_blocks):
+                self.up_blocks.append(
+                    ResidualBlock(out_c*2, out_c, cond_nc, time_emb_dim, down_flag=False, up_flag=False)
+                )
+            in_c = out_c
+        ## end of upsampling
+        self.out_up = nn.PixelShuffle(upscale_factor=2)
+        self.out_conv = nn.Conv2d(input_frame, output_frame, 3, padding=1)
+    def forward(self, x, t, cond):
+        """
+        x: (b, in_c, h, w), noisy input (concatenated with some data)
+        t: (b,), time step
+        cond: (b, cond_nc, h, w), conditional input
+        """
+        # time embedding
+        t_emb = self.time_embed(t) ## (b, time_emb_dim)
+        t_emb = self.time_embed_fc(t_emb) ## (b, time_emb_dim)
+        # input conv
+        x = self.input_layer(x)
+        # downsampling
+        skip_x = []
+        for ii, down_layer in enumerate(self.down_blocks):
+            if isinstance(down_layer, ResidualBlock):
+                x = down_layer(x, cond, t_emb)
+                skip_x.append(x)
+            elif isinstance(down_layer, AttnBlock):
+                x = down_layer(x)
+            else:
+                raise ValueError("Wrong layer type in down_blocks")
+        # middle
+        x = self.mid_block1(x, cond, t_emb)
+        x = self.mid_attn(x)
+        x = self.mid_block2(x, cond, t_emb)
+        # upsampling
+        for up_layer in self.up_blocks:
+            if isinstance(up_layer, ResidualBlock):
+                skip_feat = skip_x.pop()
+                x = torch.cat([x, skip_feat], dim=1) ## concat along channel dimension
+                x = up_layer(x, cond, t_emb)
+            elif isinstance(up_layer, AttnBlock):
+                x = up_layer(x)
+            else:
+                raise ValueError("Wrong layer type in up_blocks")
+        # output
+        x = self.out_up(x)
+        x = self.out_conv(x)
+        return x
+class DDPM(nn.Module):
+    def __init__(self, backbone, output_shape, n_steps=1000, min_beta=1e-4, max_beta=0.02, device='cuda'):
+        """
+        output_shape: dim(C, H, W)
+        """
+        super().__init__()
+        self.device = device
+        self.backbone_model = backbone
+        self.output_shape  = output_shape
+        self.n_steps = n_steps
+        ## linear betas
+        betas = get_named_beta_schedule("linear", n_steps, min_beta, max_beta)
+        alphas = 1.0 - betas
+        alpha_bars = torch.cumprod(alphas, dim=0)
+        self.register_buffer('betas', betas)
+        self.register_buffer('alphas', alphas)
+        self.register_buffer('alpha_bars', alpha_bars)
+    def forward(self, x, t, cond):
+        """
+        x: (b, in_c, h, w), noisy input (concatenated with some data)
+        cond: (b, cond_nc, h, w), conditional input
+        t: (b,), time step
+        """
+        return self.backbone_model(x, t, cond)
+    @torch.no_grad()
+    def add_noise(self, x0, t, eta=None):
+        """
+        x0: (b, c, h, w), original data
+        t: (b,), time step (0 <= t < n_steps)
+        """
+        b, c, h, w = x0.shape
+        if eta is None:
+            eta = torch.randn(b, c, h, w, device=x0.device)
+        alpha_bar = self.alpha_bars[t]
+        noisy_x = alpha_bar.sqrt().reshape(b, 1, 1, 1) * x0 + (1 - alpha_bar).sqrt().reshape(b, 1, 1, 1) * eta
+        return noisy_x
+    def denoise(self, xt, t, cond):
+        """
+        xt: (b, in_c, h, w), noisy input (concatenated with some data)
+        cond: (b, cond_nc, h, w), conditional input
+        t: (b,), time step (0 <= t < n_steps)
+        """
+        pred_noise = self(xt, t, cond)
+        return pred_noise
+    @torch.no_grad()
+    def _build_progress_iter(self, iterable, total, mode: str):
+        """
+        Internal helper to create a progress iterator based on verbose mode.
+        """
+        mode = (mode or "none").lower()
+        if mode == "tqdm":
+            try:
+                from tqdm import tqdm
+                return tqdm(iterable, total=total, desc="DDPM sampling", leave=False), mode
+            except Exception:
+                return iterable, "none"
+        return iterable, mode
+    @torch.no_grad()
+    def sample_ddpm(self, cond, input_cond=None, verbose: str = "none", store_intermediate: bool = False):
+        """
+        input_frame: (b, c, h, w) number of input frames (conditional input frames) for the diffusion model
+        cond: (b, cond_nc, h, w), conditional input
+        verbose: "none", "text", or "tqdm" for progress display
+        """
+        ## confirm that the model is in eval mode
+        self.backbone_model.eval()
+        B, C, H, W = cond.shape
+        ## get cond device
+        device = cond.device
+        x = torch.randn(B, *self.output_shape, device=device)
+        progress_iter_raw = reversed(range(self.n_steps))
+        progress_iter, mode = self._build_progress_iter(progress_iter_raw, self.n_steps, verbose)
+        use_text = mode == "text"
+        text_interval = max(1, self.n_steps // 10)
+        frames = []
+        for idx, t in enumerate(progress_iter):
+            time_tensor = (torch.ones(B, device=device) * t).long()
+            if input_cond is not None:
+                input_ = torch.cat((x, input_cond), dim=1)
+            else:
+                input_ = x
+            eta_theta = self.denoise(input_, time_tensor, cond)
+            alpha_t = self.alphas[t]
+            alpha_t_bar = self.alpha_bars[t]
+            a = 1 / alpha_t.sqrt()
+            b = ((1 - alpha_t) / (1 - alpha_t_bar).sqrt()) * eta_theta
+            x = a * (x - b)
+            if t > 0:
+                z = torch.randn(B, *self.output_shape, device=device)
+                beta_t = self.betas[t]
+                sigma_t = beta_t.sqrt()
+                x = x + sigma_t * z
+            ## store intermediate frames for visualization
+            if (idx % 50 == 0) or (t == 0):
+                out = x.clone()
+                out = ((out + 1) / 2).clamp(0, 1)
+                out = out.cpu().numpy()
+                frames.append(out)
+            if use_text and (idx + 1) % text_interval == 0:
+                print(f"DDPM sampling {idx + 1}/{self.n_steps}", flush=True)
+        if mode == "tqdm" and hasattr(progress_iter, "close"):
+            progress_iter.close()
+        if store_intermediate:
+            return x, frames
+        else:
+            return x
+    @torch.no_grad()
+    def sample_ddim(self, cond, input_cond=None, ddim_steps: int = 100, eta: float = 0.2, verbose: str = "none", store_intermediate: bool = False):
+        """
+        Deterministic/stochastic DDIM sampling.
+        cond: (b, cond_nc, h, w)
+        input_cond: optional conditional input concatenated with the predicted frames
+        ddim_steps: number of steps to sample (<= n_steps)
+        eta: 0 for deterministic DDIM, >0 adds noise controlled by eta
+        verbose: "none", "text", or "tqdm" for progress display
+        """
+        self.backbone_model.eval()
+        B, C, H, W = cond.shape
+        device = cond.device
+        ddim_steps = max(1, min(ddim_steps, self.n_steps))
+        # create evenly spaced timesteps
+        ddim_timesteps = torch.linspace(0, self.n_steps - 1, steps=ddim_steps, device=device).long()
+        ddim_timesteps = torch.unique(ddim_timesteps, sorted=True)  # safety against duplicates
+        ddim_t_reverse = list(reversed(ddim_timesteps.tolist()))
+        x = torch.randn(B, *self.output_shape, device=device)
+        progress_iter_raw = enumerate(ddim_t_reverse)
+        progress_iter, mode = self._build_progress_iter(progress_iter_raw, len(ddim_t_reverse), verbose)
+        use_text = mode == "text"
+        text_interval = max(1, len(ddim_t_reverse) // 10)
+        frames = []
+        for idx, (iter_idx, t) in enumerate(progress_iter):
+            time_tensor = torch.full((B,), t, device=device, dtype=torch.long)
+            if input_cond is not None:
+                input_ = torch.cat((x, input_cond), dim=1)
+            else:
+                input_ = x
+            eps = self.denoise(input_, time_tensor, cond)
+            alpha_bar_t = self.alpha_bars[t]
+            sqrt_alpha_bar_t = alpha_bar_t.sqrt()
+            sqrt_one_minus_alpha_bar_t = (1 - alpha_bar_t).sqrt()
+            x0_pred = (x - sqrt_one_minus_alpha_bar_t * eps) / sqrt_alpha_bar_t
+            if iter_idx + 1 < len(ddim_t_reverse):
+                t_prev = ddim_t_reverse[iter_idx + 1]
+                alpha_bar_prev = self.alpha_bars[t_prev]
+            else:
+                alpha_bar_prev = torch.ones_like(alpha_bar_t, device=device)
+            sigma_t = 0.0
+            if eta > 0 and alpha_bar_prev < 1:
+                sigma_t = eta * torch.sqrt(
+                    (1 - alpha_bar_prev) / (1 - alpha_bar_t) * (1 - alpha_bar_t / alpha_bar_prev)
+                )
+            sigma_t = torch.as_tensor(sigma_t, device=device, dtype=x.dtype)
+            noise = torch.randn_like(x) if (eta > 0 and alpha_bar_prev < 1) else torch.zeros_like(x)
+            c_t = torch.sqrt(torch.clamp(1 - alpha_bar_prev - sigma_t ** 2, min=0.0))
+            x = (
+                alpha_bar_prev.sqrt() * x0_pred
+                + c_t * eps
+                + sigma_t * noise
+            )
+            ## store intermediate frames for visualization
+            if (idx % 25 == 0) or (t == 0):
+                out = x.clone()
+                out = ((out + 1) / 2).clamp(0, 1)
+                out = out.cpu().numpy()
+                frames.append(out)
+            if use_text and (idx + 1) % text_interval == 0:
+                print(f"DDIM sampling {idx + 1}/{len(ddim_t_reverse)}", flush=True)
+        if mode == "tqdm" and hasattr(progress_iter, "close"):
+            progress_iter.close()
+        if store_intermediate:
+            return x, frames
+        else:
+            return x
+    # Backward-compatible alias
+    sample = sample_ddpm

model_architect/layers.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .weight_init import default_init
+class SPADE(nn.Module):
+    def __init__(self, norm_nc, cond_nc, spade_dim=128, param_free_norm_type='group'):
+        """
+        SPADE (Spatially Adaptive Normalization) layer.
+        norm_nc: number of channels of the normalized feature map
+        cond_nc: number of channels of the conditional map
+        """
+        super().__init__()
+        if param_free_norm_type == 'group':
+            num_groups = min(norm_nc // 4, 32)
+            while(norm_nc % num_groups != 0): # must find another value
+                num_groups -= 1
+            self.param_free_norm = nn.GroupNorm(num_groups=num_groups, num_channels=norm_nc, affine=False, eps=1e-6)
+        elif param_free_norm_type == 'instance':
+            self.param_free_norm = nn.InstanceNorm2d(norm_nc, affine=False)
+        elif param_free_norm_type == 'batch':
+            self.param_free_norm = nn.BatchNorm2d(norm_nc, affine=False)
+        else:
+            raise ValueError('%s is not a recognized param-free norm type in SPADE'
+                             % param_free_norm_type)
+        ks = 3
+        pw = ks // 2
+        self.mlp_shared = nn.Sequential(
+            nn.Conv2d(cond_nc, spade_dim, kernel_size=ks, padding=pw),
+            nn.ReLU()
+        )
+        self.mlp_gamma = nn.Conv2d(spade_dim, norm_nc, kernel_size=ks, padding=pw)
+        self.mlp_beta = nn.Conv2d(spade_dim, norm_nc, kernel_size=ks, padding=pw)
+    def forward(self, x, cond_map):
+        ## do param-free normalization (GroupNorm / InstanceNorm / BatchNorm)
+        normalized = self.param_free_norm(x)
+        # Part 2. produce scaling and bias conditioned on semantic map
+        cond_map = F.interpolate(cond_map, size=x.size()[2:], mode='nearest')
+        actv = self.mlp_shared(cond_map)
+        gamma = self.mlp_gamma(actv)
+        beta = self.mlp_beta(actv)
+        # apply scale and bias
+        out = normalized * (1 + gamma) + beta
+        return out
+class ActNorm(nn.Module):
+  def __init__(self, emb_dim, out_dim):
+    super(ActNorm, self).__init__()
+    ## For Time embedding
+    chs = 2 * out_dim
+    self.fc = nn.Linear(emb_dim, chs)
+    self.fc.weight.data = default_init()(self.fc.weight.shape)
+    nn.init.zeros_(self.fc.bias)
+    self.activation = nn.SiLU()
+  def forward(self, x, t_emb):
+    """
+    x: dim(B, C, H, W) or dim(B, C*N, H, W) if 3D
+    t_emb: dim(B, emb_dim)
+    """
+    # ada-norm as in https://github.com/openai/guided-diffusion
+    emb = self.activation(t_emb)
+    emb_out = self.fc(emb)[:, :, None, None] # Linear projection
+    scale, shift = torch.chunk(emb_out, 2, dim=1)
+    y = x * (1 + scale) + shift
+    return y
+class Upsample_with_conv(nn.Module):
+    def __init__(self, in_c, out_c):
+        super().__init__()
+        self.up = nn.Upsample(scale_factor=2, mode="nearest")
+        self.conv = nn.Conv2d(in_c, out_c, 3, padding=1)
+    def forward(self, x):
+        y = self.up(x)
+        y = self.conv(y)
+        return y
+class Downsample_with_conv(nn.Module):
+    def __init__(self, in_c, out_c):
+        super().__init__()
+        self.conv = nn.Conv2d(in_c, out_c, 3, stride=2, padding=1)
+    def forward(self, x):
+        y = self.conv(x)
+        return y
+class ResidualBlock(nn.Module):
+    def __init__(
+        self,
+        in_c,
+        out_c,
+        cond_nc,
+        emb_dim,
+        spade_dim=128,
+        dropout=0.1,
+        param_free_norm_type='group',
+        up_flag=False,
+        down_flag=False
+    ):
+        super().__init__()
+        self.in_c = in_c
+        self.out_c = out_c
+        self.cond_nc = cond_nc
+        self.emb_dim = emb_dim
+        self.up_flag = up_flag
+        self.down_flag = down_flag
+        self.activation = nn.SiLU()
+        ## first
+        self.spade1 = SPADE(in_c, cond_nc, spade_dim, param_free_norm_type)
+        self.act_norm1 = ActNorm(emb_dim, in_c)
+        self.conv1 = nn.Conv2d(in_c, in_c, 3, padding=1)
+        ## downsampling or upsampling
+        if up_flag:
+            self.up_or_down_layer = Upsample_with_conv(in_c, out_c)
+            self.skip_layer = nn.Upsample(scale_factor=2, mode="nearest")
+        elif down_flag:
+            self.up_or_down_layer = Downsample_with_conv(in_c, out_c)
+            self.skip_layer = nn.AvgPool2d(2)
+        else:
+            self.conv_no_change = nn.Conv2d(in_c, out_c, 3, padding=1)
+        ## second
+        self.spade2 = SPADE(out_c, cond_nc, spade_dim, param_free_norm_type)
+        self.act_norm2 = ActNorm(emb_dim, out_c)
+        self.conv2 = nn.Conv2d(out_c, out_c, 3, padding=1)
+        self.dropout = nn.Dropout(dropout)
+        ## skip connection
+        if in_c != out_c:
+            self.conv1x1 = nn.Conv2d(in_c, out_c, 1)
+    def forward(self, x, cond, t_emb):
+        """
+        x: dim(B, C, H, W) or dim(B, C*N, H, W) if 3D
+        cond: dim(B, cond_nc, H_cond, W_cond)
+        t_emb: dim(B, emb_dim)
+        """
+        h = x
+        ## first
+        h = self.spade1(h, cond)
+        h = self.act_norm1(h, t_emb)
+        h = self.activation(h)
+        h = self.conv1(h)
+        ## up or down
+        if self.up_flag or self.down_flag:
+            x = self.skip_layer(x)
+            h = self.up_or_down_layer(h)
+        else:
+            h = self.conv_no_change(h)
+        ## second
+        h = self.spade2(h, cond)
+        h = self.act_norm2(h, t_emb)
+        h = self.activation(h)
+        h = self.dropout(h)
+        h = self.conv2(h)
+        ## skip connection
+        if self.in_c != self.out_c:
+            x = self.conv1x1(x)
+        return x + h
+class AttnBlock(nn.Module):
+    def __init__(self, in_channel, n_head=1, norm_groups=32):
+        super().__init__()
+        self.n_head = n_head
+        self.norm = nn.GroupNorm(norm_groups, in_channel)
+        self.qkv = nn.Conv2d(in_channel, in_channel * 3, 1, bias=False)
+        self.output_layer = nn.Conv2d(in_channel, in_channel, 1)
+    def forward(self, x):
+        batch, channel, height, width = x.shape
+        n_head = self.n_head
+        head_dim = channel // n_head
+        norm = self.norm(x)
+        qkv = self.qkv(norm).view(batch, n_head, head_dim * 3, -1)
+        query, key, value = qkv.chunk(3, dim=2)  # b, n_head, head_dim, h*w
+        attn = torch.einsum(
+            "bndL, bndM -> bnLM", query, key
+        ).contiguous() / math.sqrt(head_dim)
+        attn = torch.softmax(attn, -1)
+        out = torch.einsum("bnLM, bndM -> bndL", attn, value).contiguous()
+        out = out.view(batch, channel, height, width)
+        out = self.output_layer(out)
+        return out + x
+def CropNConcat(x1, x2):
+    row_diff = x2.shape[3] - x1.shape[3]
+    col_diff = x2.shape[2] - x1.shape[2]
+    x1 = F.pad(x1, [row_diff // 2, row_diff - row_diff // 2,
+                     col_diff // 2, col_diff - col_diff // 2])
+    out = torch.cat([x1, x2], dim=1)
+    return out

model_architect/utils.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+import math
+# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py
+def get_named_beta_schedule(
+    schedule_name,
+    num_diffusion_timesteps,
+    min_beta=1e-4,
+    max_beta=0.02,
+    s=0.008,
+):
+    """
+    Get a pre-defined beta schedule for the given name.
+    The beta schedule library consists of beta schedules which remain similar
+    in the limit of num_diffusion_timesteps.
+    Beta schedules may be added, but should not be removed or changed once
+    they are committed to maintain backwards compatibility.
+    """
+    if schedule_name == "linear":
+        # Linear schedule from Ho et al, extended to work for any number of
+        # diffusion steps.
+        #scale = 1000 / num_diffusion_timesteps
+        scale = 1.0
+        beta_start = scale * min_beta
+        beta_end = scale * max_beta
+        return torch.linspace(
+            beta_start, beta_end, num_diffusion_timesteps,
+        )
+    elif schedule_name == "cosine":
+        return betas_for_alpha_bar(
+            num_diffusion_timesteps,
+            lambda t: math.cos((t + s) / (1 + s) * math.pi / 2) ** 2,
+        )
+    else:
+        raise NotImplementedError(f"unknown beta schedule: {schedule_name}")
+def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function,
+    which defines the cumulative product of (1-beta) over time from t = [0,1].
+    :param num_diffusion_timesteps: the number of betas to produce.
+    :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
+                      produces the cumulative product of (1-beta) up to that
+                      part of the diffusion process.
+    :param max_beta: the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+    """
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+    return torch.tensor(betas, dtype=torch.float32)

model_architect/weight_init.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import torch
+import numpy as np
+def variance_scaling(scale, mode, distribution,
+                     in_axis=1, out_axis=0,
+                     dtype=torch.float32,
+                     device='cpu'):
+  """Ported from JAX. """
+  def _compute_fans(shape, in_axis=1, out_axis=0):
+    receptive_field_size = np.prod(shape) / shape[in_axis] / shape[out_axis]
+    fan_in = shape[in_axis] * receptive_field_size
+    fan_out = shape[out_axis] * receptive_field_size
+    return fan_in, fan_out
+  def init(shape, dtype=dtype, device=device):
+    fan_in, fan_out = _compute_fans(shape, in_axis, out_axis)
+    if mode == "fan_in":
+      denominator = fan_in
+    elif mode == "fan_out":
+      denominator = fan_out
+    elif mode == "fan_avg":
+      denominator = (fan_in + fan_out) / 2
+    else:
+      raise ValueError(
+        "invalid mode for variance scaling initializer: {}".format(mode))
+    variance = scale / denominator
+    if distribution == "normal":
+      return torch.randn(*shape, dtype=dtype, device=device) * np.sqrt(variance)
+    elif distribution == "uniform":
+      return (torch.rand(*shape, dtype=dtype, device=device) * 2. - 1.) * np.sqrt(3 * variance)
+    else:
+      raise ValueError("invalid distribution for variance scaling initializer")
+  return init
+def default_init(scale=1.):
+  """The same initialization used in DDPM."""
+  scale = 1e-10 if scale == 0 else scale
+  return variance_scaling(scale, 'fan_avg', 'uniform')

model_weights/ft06_01hr/weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d1664224001817cb35f970ac9ff06d7e6ea66b0152385e849c6d7d1bf6bd01f
+size 231196876

model_weights/ft36_06hr/weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c7e10ccc11f79598d99bd77ed52a6c02c4ec0bbd567e0f423bafa9ff887622f
+size 326923212

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+numpy==1.26.4
+torch==2.4.0
+einops==0.8.0

sample_data/sample_202504131100.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6117356ac780a530645e192cc85d647b103c915b63433441d810dedc7cdd4ec1
+size 33002900

sample_data/sample_202504161200.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7b9e6c7ed76f695f7c6f2f1a976f4c27128120e5c4328223809c27dc8feee52
+size 33300209

sample_data/sample_202507151200.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e20eb69ea2bef6bb0074c376afa7e8398e7da8eb1edd9a1f11c343ffe711a299
+size 33038261