Spaces:

rahul7star
/

Image2Video

Paused

App Files Files Community

rahul7star commited on 5 days ago

Commit

7b09632

verified ·

1 Parent(s): 11a45c8

Update app_quant_latent.py

Browse files

Files changed (1) hide show

app_quant_latent.py +57 -45

app_quant_latent.py CHANGED Viewed

@@ -579,7 +579,6 @@ def upload_latents_to_hf(latent_dict, filename="latents.pt"):
         os.remove(local_path)
         raise e
 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
@@ -590,46 +589,36 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     latent_gallery = []
     final_gallery = []
-    # --- Try generating latent previews ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
-        # Decode latent tensor to PIL for preview with robust fallbacks
-        latent_img = placeholder
-        try:
-            with torch.no_grad():
-                # 1️⃣ Try normal VAE decode if available
-                if hasattr(pipe, "vae") and hasattr(pipe.vae, "decode"):
-                    try:
-                        latent_img_tensor = pipe.vae.decode(latents).sample  # [1,3,H,W]
-                        latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
-                        latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
-                        latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
-                    except Exception as e1:
-                        LOGS.append(f"⚠️ VAE decode failed: {e1}")
-                # 2️⃣ Collapse first 3 channels if decode failed
-                if latent_img is placeholder and latents.shape[1] >= 3:
-                    ch = latents[0, :3, :, :]
-                    ch = (ch - ch.min()) / (ch.max() - ch.min() + 1e-8)
-                    latent_img = Image.fromarray((ch.permute(1, 2, 0).cpu().numpy() * 255).astype('uint8'))
-                # 3️⃣ Collapse all channels to mean -> replicate to RGB
-                if latent_img is placeholder:
-                    mean_ch = latents[0].mean(dim=0, keepdim=True)  # [1,H,W]
-                    mean_ch = (mean_ch - mean_ch.min()) / (mean_ch.max() - mean_ch.min() + 1e-8)
-                    latent_img = Image.fromarray(
-                        torch.cat([mean_ch]*3, dim=0).permute(1,2,0).cpu().numpy().astype('uint8')
-                    )
-        except Exception as e:
-            LOGS.append(f"⚠️ Latent to image conversion failed: {e}")
-            latent_img = placeholder
-        latent_gallery.append(latent_img)
-        yield None, latent_gallery, LOGS  # show preview immediately
-        # Save latents to HF for later testing
         latent_dict = {"latents": latents.cpu(), "prompt": prompt, "seed": seed}
         try:
             hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_{seed}.pt")
@@ -642,7 +631,7 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
-    # --- Final image: completely untouched, uses standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
@@ -663,9 +652,9 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS
-# this version generate well for final and gives a tensor back for latent
 @spaces.GPU
-def generate_image_workswell(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     device = "cuda"
     generator = torch.Generator(device).manual_seed(int(seed))
@@ -678,14 +667,36 @@ def generate_image_workswell(prompt, height, width, steps, seed, guidance_scale=
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
-        # Decode latent tensor to PIL for preview
         try:
             with torch.no_grad():
-                latent_img_tensor = pipe.vae.decode(latents).sample  # [1, 3, H, W]
-                latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
-                latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]  # HWC
-                latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
-        except Exception:
             latent_img = placeholder
         latent_gallery.append(latent_img)
@@ -725,6 +736,7 @@ def generate_image_workswell(prompt, height, width, steps, seed, guidance_scale=
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS

         os.remove(local_path)
         raise e
 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     latent_gallery = []
     final_gallery = []
+    # --- Generate latent previews in a loop ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        # Convert latents to float32 if necessary
+        if latents.dtype != torch.float32:
+            latents = latents.float()
+        # Loop for multiple previews before final image
+        num_previews = min(10, steps)  # show ~10 previews
+        preview_steps = torch.linspace(0, 1, num_previews)
+        for i, alpha in enumerate(preview_steps):
+            try:
+                with torch.no_grad():
+                    # Simple noise interpolation for preview (simulate denoising progress)
+                    preview_latent = latents * alpha + torch.randn_like(latents) * (1 - alpha)
+                    # Decode to PIL
+                    latent_img_tensor = pipe.vae.decode(preview_latent).sample  # [1,3,H,W]
+                    latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
+                    latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
+                    latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
+            except Exception as e:
+                LOGS.append(f"⚠️ Latent preview decode failed: {e}")
+                latent_img = placeholder
+            latent_gallery.append(latent_img)
+            yield None, latent_gallery, LOGS  # update Gradio with intermediate preview
+        # Save final latents to HF
         latent_dict = {"latents": latents.cpu(), "prompt": prompt, "seed": seed}
         try:
             hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_{seed}.pt")
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
+    # --- Final image: untouched standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS
+# this is astable vesopn tha can gen final and a noise to latent
 @spaces.GPU
+def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     device = "cuda"
     generator = torch.Generator(device).manual_seed(int(seed))
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        # Decode latent tensor to PIL for preview with robust fallbacks
+        latent_img = placeholder
         try:
             with torch.no_grad():
+                # 1️⃣ Try normal VAE decode if available
+                if hasattr(pipe, "vae") and hasattr(pipe.vae, "decode"):
+                    try:
+                        latent_img_tensor = pipe.vae.decode(latents).sample  # [1,3,H,W]
+                        latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
+                        latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
+                        latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
+                    except Exception as e1:
+                        LOGS.append(f"⚠️ VAE decode failed: {e1}")
+                # 2️⃣ Collapse first 3 channels if decode failed
+                if latent_img is placeholder and latents.shape[1] >= 3:
+                    ch = latents[0, :3, :, :]
+                    ch = (ch - ch.min()) / (ch.max() - ch.min() + 1e-8)
+                    latent_img = Image.fromarray((ch.permute(1, 2, 0).cpu().numpy() * 255).astype('uint8'))
+                # 3️⃣ Collapse all channels to mean -> replicate to RGB
+                if latent_img is placeholder:
+                    mean_ch = latents[0].mean(dim=0, keepdim=True)  # [1,H,W]
+                    mean_ch = (mean_ch - mean_ch.min()) / (mean_ch.max() - mean_ch.min() + 1e-8)
+                    latent_img = Image.fromarray(
+                        torch.cat([mean_ch]*3, dim=0).permute(1,2,0).cpu().numpy().astype('uint8')
+                    )
+        except Exception as e:
+            LOGS.append(f"⚠️ Latent to image conversion failed: {e}")
             latent_img = placeholder
         latent_gallery.append(latent_img)
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS
+# this version generate well for final and gives a tensor back for latent