Spaces:

rahul7star
/

Image2Video

Paused

App Files Files Community

rahul7star commited on 6 days ago

Commit

e7274da

verified ·

1 Parent(s): 7b09632

Update app_quant_latent.py

Browse files

Files changed (1) hide show

app_quant_latent.py +55 -222

app_quant_latent.py CHANGED Viewed

@@ -579,6 +579,8 @@ def upload_latents_to_hf(latent_dict, filename="latents.pt"):
         os.remove(local_path)
         raise e
 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
@@ -589,49 +591,52 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     latent_gallery = []
     final_gallery = []
-    # --- Generate latent previews in a loop ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
-        # Convert latents to float32 if necessary
-        if latents.dtype != torch.float32:
-            latents = latents.float()
-        # Loop for multiple previews before final image
-        num_previews = min(10, steps)  # show ~10 previews
-        preview_steps = torch.linspace(0, 1, num_previews)
-        for i, alpha in enumerate(preview_steps):
             try:
                 with torch.no_grad():
-                    # Simple noise interpolation for preview (simulate denoising progress)
-                    preview_latent = latents * alpha + torch.randn_like(latents) * (1 - alpha)
-                    # Decode to PIL
-                    latent_img_tensor = pipe.vae.decode(preview_latent).sample  # [1,3,H,W]
                     latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
                     latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
-                    latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
-            except Exception as e:
-                LOGS.append(f"⚠️ Latent preview decode failed: {e}")
                 latent_img = placeholder
             latent_gallery.append(latent_img)
-            yield None, latent_gallery, LOGS  # update Gradio with intermediate preview
-        # Save final latents to HF
-        latent_dict = {"latents": latents.cpu(), "prompt": prompt, "seed": seed}
         try:
-            hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_{seed}.pt")
-            LOGS.append(f"🔹 Latents uploaded: {hf_url}")
         except Exception as e:
-            LOGS.append(f"⚠️ Failed to upload latents: {e}")
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
-    # --- Final image: untouched standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
@@ -652,6 +657,7 @@ def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS
 # this is astable vesopn tha can gen final and a noise to latent
 @spaces.GPU
 def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
@@ -663,46 +669,36 @@ def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
     latent_gallery = []
     final_gallery = []
-    # --- Try generating latent previews ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
-        # Decode latent tensor to PIL for preview with robust fallbacks
-        latent_img = placeholder
-        try:
-            with torch.no_grad():
-                # 1️⃣ Try normal VAE decode if available
-                if hasattr(pipe, "vae") and hasattr(pipe.vae, "decode"):
-                    try:
-                        latent_img_tensor = pipe.vae.decode(latents).sample  # [1,3,H,W]
-                        latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
-                        latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
-                        latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
-                    except Exception as e1:
-                        LOGS.append(f"⚠️ VAE decode failed: {e1}")
-                # 2️⃣ Collapse first 3 channels if decode failed
-                if latent_img is placeholder and latents.shape[1] >= 3:
-                    ch = latents[0, :3, :, :]
-                    ch = (ch - ch.min()) / (ch.max() - ch.min() + 1e-8)
-                    latent_img = Image.fromarray((ch.permute(1, 2, 0).cpu().numpy() * 255).astype('uint8'))
-                # 3️⃣ Collapse all channels to mean -> replicate to RGB
-                if latent_img is placeholder:
-                    mean_ch = latents[0].mean(dim=0, keepdim=True)  # [1,H,W]
-                    mean_ch = (mean_ch - mean_ch.min()) / (mean_ch.max() - mean_ch.min() + 1e-8)
-                    latent_img = Image.fromarray(
-                        torch.cat([mean_ch]*3, dim=0).permute(1,2,0).cpu().numpy().astype('uint8')
-                    )
-        except Exception as e:
-            LOGS.append(f"⚠️ Latent to image conversion failed: {e}")
-            latent_img = placeholder
-        latent_gallery.append(latent_img)
-        yield None, latent_gallery, LOGS  # show preview immediately
-        # Save latents to HF for later testing
         latent_dict = {"latents": latents.cpu(), "prompt": prompt, "seed": seed}
         try:
             hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_{seed}.pt")
@@ -715,7 +711,7 @@ def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
-    # --- Final image: completely untouched, uses standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
@@ -736,7 +732,6 @@ def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS
-# this version generate well for final and gives a tensor back for latent
@@ -744,169 +739,7 @@ def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
-@spaces.GPU
-def generate_image_backup(prompt, height, width, steps, seed, guidance_scale=0.0, return_latents=False):
-    """
-    Robust dual pipeline:
-    - Advanced latent generation first
-    - Fallback to standard pipeline if latent fails
-    - Always returns final image
-    - Returns gallery (latents or final image) and logs
-    """
-    LOGS = []
-    image = None
-    latents = None
-    gallery = []
-    # Keep a placeholder original image (white) in case everything fails
-    original_image = Image.new("RGB", (width, height), color=(255, 255, 255))
-    try:
-        generator = torch.Generator(device).manual_seed(int(seed))
-        # -------------------------------
-        # Try advanced latent generation
-        # -------------------------------
-        try:
-            batch_size = 1
-            num_channels_latents = getattr(pipe.unet, "in_channels", None)
-            if num_channels_latents is None:
-                raise AttributeError("pipe.unet.in_channels not found, fallback to standard pipeline")
-            latents = pipe.prepare_latents(
-                batch_size=batch_size,
-                num_channels=num_channels_latents,
-                height=height,
-                width=width,
-                dtype=torch.float32,
-                device=device,
-                generator=generator
-            )
-            LOGS.append(f"✅ Latents prepared: {latents.shape}")
-            output = pipe(
-                prompt=prompt,
-                height=height,
-                width=width,
-                num_inference_steps=steps,
-                guidance_scale=guidance_scale,
-                generator=generator,
-                latents=latents
-            )
-            image = output.images[0]
-            gallery = [image] if image else []
-            LOGS.append("✅ Advanced latent generation succeeded.")
-        # -------------------------------
-        # Fallback to standard pipeline
-        # -------------------------------
-        except Exception as e_latent:
-            LOGS.append(f"⚠️ Advanced latent generation failed: {e_latent}")
-            LOGS.append("🔁 Falling back to standard pipeline...")
-            try:
-                output = pipe(
-                    prompt=prompt,
-                    height=height,
-                    width=width,
-                    num_inference_steps=steps,
-                    guidance_scale=guidance_scale,
-                    generator=generator
-                )
-                image = output.images[0]
-                gallery = [image] if image else []
-                LOGS.append("✅ Standard pipeline generation succeeded.")
-            except Exception as e_standard:
-                LOGS.append(f"❌ Standard pipeline generation failed: {e_standard}")
-                image = original_image  # Always return some image
-                gallery = [image]
-        # -------------------------------
-        # Return all 3 outputs
-        # -------------------------------
-        return image, gallery, LOGS
-    except Exception as e:
-        LOGS.append(f"❌ Inference failed entirely: {e}")
-        return original_image, [original_image], LOGS
-# ============================================================
-# UI
-# ============================================================
-# Utility: scan local HF cache for safetensors in a repo folder name
-def list_loras_from_repo(repo_id):
-    """
-    Attempts to find safetensors inside HF cache directory for repo_id.
-    This only scans local cache; it does NOT download anything.
-    Returns:
-      A list of strings suitable for showing in the dropdown. Prefer returning
-      paths relative to the repo root (e.g. "NSFW/doggystyle_pov.safetensors") so that
-      pipe.load_lora_weights(repo_id, weight_name=that_path) works for nested files.
-      If a relative path can't be determined, returns absolute cached file paths.
-    """
-    if not repo_id:
-        return []
-    safe_list = []
-    # Candidate cache roots
-    hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
-    alt_cache = "/home/user/.cache/huggingface/hub"
-    candidates = [hf_cache, alt_cache]
-    # Normalize repo variants to search for in path
-    owner_repo = repo_id.replace("/", "_")
-    owner_repo_dash = repo_id.replace("/", "-")
-    owner_repo_double = repo_id.replace("/", "--")
-    # Walk caches and collect safetensors
-    for root_cache in candidates:
-        if not os.path.exists(root_cache):
-            continue
-        for dirpath, dirnames, filenames in os.walk(root_cache):
-            for f in filenames:
-                if not f.endswith(".safetensors"):
-                    continue
-                full_path = os.path.join(dirpath, f)
-                # try to find a repo-root-like substring in dirpath
-                chosen_base = None
-                for pattern in (owner_repo_double, owner_repo_dash, owner_repo):
-                    idx = dirpath.find(pattern)
-                    if idx != -1:
-                        chosen_base = dirpath[: idx + len(pattern)]
-                        break
-                # fallback: look for the repo folder name (last component) e.g., "ZImageLora"
-                if chosen_base is None:
-                    repo_tail = repo_id.split("/")[-1]
-                    idx2 = dirpath.find(repo_tail)
-                    if idx2 != -1:
-                        chosen_base = dirpath[: idx2 + len(repo_tail)]
-                # If we found a base that looks like the cached repo root, compute relative path
-                if chosen_base:
-                    try:
-                        rel = os.path.relpath(full_path, chosen_base)
-                        # If relpath goes up (starts with ..) then prefer full_path
-                        if rel and not rel.startswith(".."):
-                            # Normalize to forward slashes for HF repo weight_name usage
-                            rel_normalized = rel.replace(os.sep, "/")
-                            safe_list.append(rel_normalized)
-                            continue
-                    except Exception:
-                        pass
-                # Otherwise append absolute path (last resort)
-                safe_list.append(full_path)
-    # remove duplicates and sort
-    safe_list = sorted(list(dict.fromkeys(safe_list)))
-    return safe_list
 with gr.Blocks(title="Z-Image-Turbo") as demo:

         os.remove(local_path)
         raise e
 @spaces.GPU
 def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
     LOGS = []
     latent_gallery = []
     final_gallery = []
+    all_latents = []  # store all preview latents
+    # --- Try generating latent previews ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        # Loop through timesteps for preview generation
+        for i, t in enumerate(pipe.scheduler.timesteps):
             try:
+                # Convert latent tensor to PIL for preview
                 with torch.no_grad():
+                    # Some pipelines may require same dtype as bias
+                    latent_to_decode = latents.to(pipe.vae.dtype)
+                    latent_img_tensor = pipe.vae.decode(latent_to_decode).sample  # [1,3,H,W]
                     latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
                     latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
+                    latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype("uint8"))
+            except Exception:
                 latent_img = placeholder
+                LOGS.append("⚠️ Latent preview decode failed.")
             latent_gallery.append(latent_img)
+            all_latents.append(latents.cpu().clone())  # save current latent
+            # Yield intermediate preview every few steps
+            if i % max(1, len(pipe.scheduler.timesteps) // 10) == 0:
+                yield None, latent_gallery, LOGS
+        # Upload full series of latents
         try:
+            latent_dict = {
+                "latents_series": all_latents,
+                "prompt": prompt,
+                "seed": seed
+            }
+            hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_series_{seed}.pt")
+            LOGS.append(f"🔹 All preview latents uploaded: {hf_url}")
         except Exception as e:
+            LOGS.append(f"⚠️ Failed to upload all preview latents: {e}")
     except Exception as e:
         LOGS.append(f"⚠️ Latent generation failed: {e}")
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
+    # --- Final image: completely untouched, uses standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS
 # this is astable vesopn tha can gen final and a noise to latent
 @spaces.GPU
 def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
     latent_gallery = []
     final_gallery = []
+    # --- Generate latent previews in a loop ---
     try:
         latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        # Convert latents to float32 if necessary
+        if latents.dtype != torch.float32:
+            latents = latents.float()
+        # Loop for multiple previews before final image
+        num_previews = min(10, steps)  # show ~10 previews
+        preview_steps = torch.linspace(0, 1, num_previews)
+        for i, alpha in enumerate(preview_steps):
+            try:
+                with torch.no_grad():
+                    # Simple noise interpolation for preview (simulate denoising progress)
+                    preview_latent = latents * alpha + torch.randn_like(latents) * (1 - alpha)
+                    # Decode to PIL
+                    latent_img_tensor = pipe.vae.decode(preview_latent).sample  # [1,3,H,W]
+                    latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
+                    latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
+                    latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
+            except Exception as e:
+                LOGS.append(f"⚠️ Latent preview decode failed: {e}")
+                latent_img = placeholder
+            latent_gallery.append(latent_img)
+            yield None, latent_gallery, LOGS  # update Gradio with intermediate preview
+        # Save final latents to HF
         latent_dict = {"latents": latents.cpu(), "prompt": prompt, "seed": seed}
         try:
             hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_{seed}.pt")
         latent_gallery.append(placeholder)
         yield None, latent_gallery, LOGS
+    # --- Final image: untouched standard pipeline ---
     try:
         output = pipe(
             prompt=prompt,
         final_gallery.append(placeholder)
         latent_gallery.append(placeholder)
         yield placeholder, latent_gallery, LOGS
 with gr.Blocks(title="Z-Image-Turbo") as demo: