Spaces:
Running
on
Zero
Running
on
Zero
File size: 8,176 Bytes
0b4562b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2025 Apple Inc. All Rights Reserved.
#
## camera
from pathlib import Path
import json
import re
import tarfile
from einops import rearrange
import torch
import numpy as np
from PIL import Image
import torchvision.transforms.functional as F
from torchvision import transforms
import math
def find_factors(n):
factors = set()
for i in range(1, int(math.sqrt(n)) + 1):
if n % i == 0:
factors.add(i)
factors.add(n // i)
return sorted(factors, reverse=True)
def find_max_scale_factor(A, B):
gcd = math.gcd(A, B)
factors = find_factors(gcd)
for factor in factors:
if A // factor >= 32 and B // factor >= 32 and abs(A-B)//factor % 2 ==0:
return factor
return 1
def _get_plucker_embedding(intrinsic_parameters, w2c_matrices, height, width, norm_t=False, mask_idx=[0], project=False):
return np.concatenate([
get_plucker_embedding(intrinsic_parameters, w2c_matrices, height, width, norm_t, idx, project)
for idx in mask_idx], -1)
def get_plucker_embedding(intrinsic_parameters, w2c_matrices, height, width, norm_t=False, mask_idx=0, project=True):
"""
intrinsic_parameters.shape = [b f 4]
c2w_matrices.shape = [b f 4 4]
"""
num_frames = intrinsic_parameters.shape[0]
c2w_matrices = np.linalg.inv(w2c_matrices)
if project:
w2c_cond_matrices = w2c_matrices[mask_idx: mask_idx+1]
c2w_matrices = w2c_cond_matrices @ c2w_matrices # relative pose to the first frame
if norm_t:
offset = c2w_matrices[:, :3, -1:] # f, 3, 1
offset = offset / (np.abs(offset).max(axis=(1, 2), keepdims=True) + 1e-7)
c2w_matrices[:, :3, -1:] = offset
ys, xs = np.meshgrid(
np.linspace(0, height - 1, height, dtype=c2w_matrices.dtype),
np.linspace(0, width - 1, width, dtype=c2w_matrices.dtype), indexing='ij')
ys = np.tile(ys.reshape([1, height * width]), [num_frames, 1]) +0.5
xs = np.tile(xs.reshape([1, height * width]), [num_frames, 1]) +0.5
fx, fy, cx, cy = np.split(intrinsic_parameters, 4, -1)
fx, fy, cx, cy = fx * width, fy * height, cx * width, cy * height
zs_cam = np.ones_like(xs)
xs_cam = (xs - cx) / fx * zs_cam
ys_cam = (ys - cy) / fy * zs_cam
directions = np.stack((xs_cam, ys_cam, zs_cam), -1)
directions = directions / np.linalg.norm(directions, axis=-1, keepdims=True)
ray_directions_w = (c2w_matrices[..., :3, :3] @ directions.transpose(0, 2, 1)).transpose(0, 2, 1)
ray_origin_w = np.expand_dims(c2w_matrices[..., :3, 3], axis=-2)
ray_origin_w = np.broadcast_to(ray_origin_w, ray_directions_w.shape)
ray_dxo = np.cross(ray_origin_w, ray_directions_w)
plucker_embedding = np.concatenate([ray_dxo, ray_directions_w], -1).reshape(num_frames, height, width, 6)
return plucker_embedding
def label_to_camera(label):
num_frames = label.shape[0]
bottom = np.zeros([num_frames, 1, 4])
bottom[:, :, -1] = 1
# [w, h, flx, fly] + camera_model[0] + camera_model[1] + camera_model[2] + camera_model[3]
w, h, fx, fy = label[:, 0:1], label[:, 1:2], label[:, 2:3], label[:, 3:4]
fx, fy = fx / w, fy / h
c2w = label[:, 4:].reshape(num_frames, 4, 4)
c2w[:, 2, :] *= -1
c2w = c2w[:, np.array([1, 0, 2, 3]), :]
c2w[:, 0:3, 1:3] *= -1
w2c = np.linalg.inv(c2w)
intrinsic = np.concatenate([fx, fy, np.ones_like(fx) * .5, np.ones_like(fx) * .5], 1)
return intrinsic, w2c
def get_camera_condition(tar, camera_file, width=960, height=544, factor=16, frame_inds=None):
try:
with tar.extractfile(camera_file) as cam_data:
camera_data = json.load(cam_data)
prefix = [camera_data['w'], camera_data['h'], camera_data['fl_x'], camera_data['fl_y']]
labels = []
if frame_inds is None:
frame_inds = list(range(len(camera_data['frames'])))
for ind in frame_inds:
frame_info = camera_data['frames'][ind]
label = prefix + sum(frame_info['transform_matrix'], [])
labels.append(label)
label = np.array(labels)
intrinsic, w2c = label_to_camera(label)
# factor = find_max_scale_factor(height, width)
H, W = height // factor, width // factor
ray_map = _get_plucker_embedding(intrinsic, w2c, H, W, norm_t=False, mask_idx=[0], project=True)
ray_map = torch.from_numpy(ray_map) #.permute(0, 3, 1, 2) # [f, h, w, c]
# ray_map = F.resize(transforms.CenterCrop(min(H, W))(ray_map), 32).permute(0, 2, 3, 1)
except Exception as e:
print(f'Reading data error {e} {camera_file}')
ray_map = np.zeros((len(frame_inds), H, W, 6))
return ray_map
## force
def get_wind_condition(force, angle, min_force, max_force, num_frames=45, num_channels=3, height=480, width=720):
condition = torch.zeros((num_frames, num_channels, height, width))
# first channel gets wind_speed
condition[:, 0] = -1 + 2*(force-min_force)/(max_force-min_force)
# second channel gets cos(wind_angle)
condition[:, 1] = math.cos(angle * torch.pi / 180.0)
# third channel gets sin(wind_angle)
condition[:, 2] = math.sin(angle * torch.pi / 180.0)
return rearrange(condition, 'f c h w -> f h w c')
def get_gaussian_blob(x, y, radius=10, amplitude=1.0, shape=(3, 480, 720), device=None):
"""
Create a tensor containing a Gaussian blob at the specified location.
Args:
x (int): x-coordinate of the blob center
y (int): y-coordinate of the blob center
radius (int, optional): Radius of the Gaussian blob. Defaults to 10.
amplitude (float, optional): Maximum intensity of the blob. Defaults to 1.0.
shape (tuple, optional): Shape of the output tensor (channels, height, width). Defaults to (3, 480, 720).
device (torch.device, optional): Device to create the tensor on. Defaults to None.
Returns:
torch.Tensor: Tensor of shape (channels, height, width) containing the Gaussian blob
"""
num_channels, height, width = shape
# Create a new tensor filled with zeros
blob_tensor = torch.zeros(shape, device=device)
# Create coordinate grids
y_grid, x_grid = torch.meshgrid(
torch.arange(height, device=device),
torch.arange(width, device=device),
indexing='ij'
)
# Calculate squared distance from (x, y)
squared_dist = (x_grid - x) ** 2 + (y_grid - y) ** 2
# Create Gaussian blob using the squared distance
gaussian = amplitude * torch.exp(-squared_dist / (2.0 * radius ** 2))
# Add the Gaussian blob to all channels
for c in range(num_channels):
blob_tensor[c] = gaussian
return blob_tensor
def get_point_condition(force, angle, x_pos, y_pos, min_force, max_force, num_frames=45, num_channels=3, height=480, width=720):
condition = torch.zeros((num_frames, num_channels, height, width)) # (45, 3, 480, 720)
x_pos_start = x_pos*width
y_pos_start = (1-y_pos)*height
DISPLACEMENT_FOR_MAX_FORCE = width / 2
DISPLACEMENT_FOR_MIN_FORCE = width / 8
force_percent = (force - min_force) / (max_force - min_force)
total_displacement = DISPLACEMENT_FOR_MIN_FORCE + (DISPLACEMENT_FOR_MAX_FORCE - DISPLACEMENT_FOR_MIN_FORCE) * force_percent
x_pos_end = x_pos_start + total_displacement * math.cos(angle * torch.pi / 180.0)
y_pos_end = y_pos_start - total_displacement * math.sin(angle * torch.pi / 180.0)
for frame in range(num_frames):
t = frame / (num_frames-1)
x_pos_ = x_pos_start * (1-t) + x_pos_end * t # t = 0 --> start; t = 0 --> end
y_pos_ = y_pos_start * (1-t) + y_pos_end * t # t = 0 --> start; t = 0 --> end
blob_tensor = get_gaussian_blob(x=x_pos_, y=y_pos_, radius=20, amplitude=1.0, shape=(num_channels, height, width))
condition[frame] += blob_tensor
return rearrange(condition, 'f c h w -> f h w c')
|