Spaces:

rahul7star
/

Image2Video

Paused

App Files Files Community

rahul7star commited on 12 days ago

Commit

e7a9d63

verified ·

1 Parent(s): 2aef3fc

Create app_quant_latent1.py

Browse files

Files changed (1) hide show

app_quant_latent1.py +614 -0

app_quant_latent1.py ADDED Viewed

	@@ -0,0 +1,614 @@

+import torch
+import spaces
+import gradio as gr
+import sys
+import platform
+import diffusers
+import transformers
+import psutil
+import os
+import time
+from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
+from diffusers import ZImagePipeline, AutoModel
+from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
+latent_history = []
+# ============================================================
+# LOGGING BUFFER
+# ============================================================
+LOGS = ""
+def log(msg):
+    global LOGS
+    print(msg)
+    LOGS += msg + "\n"
+    return msg
+# ============================================================
+# SYSTEM METRICS — LIVE GPU + CPU MONITORING
+# ============================================================
+def log_system_stats(tag=""):
+    try:
+        log(f"\n===== 🔥 SYSTEM STATS {tag} =====")
+        # ============= GPU STATS =============
+        if torch.cuda.is_available():
+            allocated = torch.cuda.memory_allocated(0) / 1e9
+            reserved = torch.cuda.memory_reserved(0) / 1e9
+            total = torch.cuda.get_device_properties(0).total_memory / 1e9
+            free = total - allocated
+            log(f"💠 GPU Total     : {total:.2f} GB")
+            log(f"💠 GPU Allocated : {allocated:.2f} GB")
+            log(f"💠 GPU Reserved  : {reserved:.2f} GB")
+            log(f"💠 GPU Free      : {free:.2f} GB")
+        # ============= CPU STATS ============
+        cpu = psutil.cpu_percent()
+        ram_used = psutil.virtual_memory().used / 1e9
+        ram_total = psutil.virtual_memory().total / 1e9
+        log(f"🧠 CPU Usage     : {cpu}%")
+        log(f"🧠 RAM Used      : {ram_used:.2f} GB / {ram_total:.2f} GB")
+    except Exception as e:
+        log(f"⚠️ Failed to log system stats: {e}")
+# ============================================================
+# ENVIRONMENT INFO
+# ============================================================
+log("===================================================")
+log("🔍 Z-IMAGE-TURBO DEBUGGING + LIVE METRIC LOGGER")
+log("===================================================\n")
+log(f"📌 PYTHON VERSION       : {sys.version.replace(chr(10),' ')}")
+log(f"📌 PLATFORM             : {platform.platform()}")
+log(f"📌 TORCH VERSION        : {torch.__version__}")
+log(f"📌 TRANSFORMERS VERSION : {transformers.__version__}")
+log(f"📌 DIFFUSERS VERSION    : {diffusers.__version__}")
+log(f"📌 CUDA AVAILABLE       : {torch.cuda.is_available()}")
+log_system_stats("AT STARTUP")
+if not torch.cuda.is_available():
+    raise RuntimeError("❌ CUDA Required")
+device = "cuda"
+gpu_id = 0
+# ============================================================
+# MODEL SETTINGS
+# ============================================================
+model_cache = "./weights/"
+model_id = "Tongyi-MAI/Z-Image-Turbo"
+torch_dtype = torch.bfloat16
+USE_CPU_OFFLOAD = False
+log("\n===================================================")
+log("🧠 MODEL CONFIGURATION")
+log("===================================================")
+log(f"Model ID              : {model_id}")
+log(f"Model Cache Directory : {model_cache}")
+log(f"torch_dtype           : {torch_dtype}")
+log(f"USE_CPU_OFFLOAD       : {USE_CPU_OFFLOAD}")
+log_system_stats("BEFORE TRANSFORMER LOAD")
+# ============================================================
+# FUNCTION TO CONVERT LATENTS TO IMAGE
+# ============================================================
+def latent_to_image(latent):
+    try:
+        img_tensor = pipe.vae.decode(latent)
+        img_tensor = (img_tensor / 2 + 0.5).clamp(0, 1)
+        pil_img = T.ToPILImage()(img_tensor[0])
+        return pil_img
+    except Exception as e:
+        log(f"⚠️ Failed to decode latent: {e}")
+        return None
+# ============================================================
+# SAFE TRANSFORMER INSPECTION
+# ============================================================
+def inspect_transformer(model, name):
+    log(f"\n🔍 Inspecting {name}")
+    try:
+        candidates = ["transformer_blocks", "blocks", "layers", "encoder", "model"]
+        blocks = None
+        for attr in candidates:
+            if hasattr(model, attr):
+                blocks = getattr(model, attr)
+                break
+        if blocks is None:
+            log(f"⚠️ No block structure found in {name}")
+            return
+        if hasattr(blocks, "__len__"):
+            log(f"Total Blocks = {len(blocks)}")
+        else:
+            log("⚠️ Blocks exist but are not iterable")
+        for i in range(min(10, len(blocks) if hasattr(blocks, "__len__") else 0)):
+            log(f"Block {i} = {blocks[i].__class__.__name__}")
+    except Exception as e:
+        log(f"⚠️ Transformer inspect error: {e}")
+# ============================================================
+# LOAD TRANSFORMER — WITH LIVE STATS
+# ============================================================
+log("\n===================================================")
+log("🔧 LOADING TRANSFORMER BLOCK")
+log("===================================================")
+log("📌 Logging memory before load:")
+log_system_stats("START TRANSFORMER LOAD")
+try:
+    quant_cfg = DiffusersBitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch_dtype,
+        bnb_4bit_use_double_quant=True,
+    )
+    transformer = AutoModel.from_pretrained(
+        model_id,
+        cache_dir=model_cache,
+        subfolder="transformer",
+        quantization_config=quant_cfg,
+        torch_dtype=torch_dtype,
+        device_map=device,
+    )
+    log("✅ Transformer loaded successfully.")
+except Exception as e:
+    log(f"❌ Transformer load failed: {e}")
+    transformer = None
+log_system_stats("AFTER TRANSFORMER LOAD")
+if transformer:
+    inspect_transformer(transformer, "Transformer")
+# ============================================================
+# LOAD TEXT ENCODER
+# ============================================================
+log("\n===================================================")
+log("🔧 LOADING TEXT ENCODER")
+log("===================================================")
+log_system_stats("START TEXT ENCODER LOAD")
+try:
+    quant_cfg2 = TransformersBitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch_dtype,
+        bnb_4bit_use_double_quant=True,
+    )
+    text_encoder = AutoModel.from_pretrained(
+        model_id,
+        cache_dir=model_cache,
+        subfolder="text_encoder",
+        quantization_config=quant_cfg2,
+        torch_dtype=torch_dtype,
+        device_map=device,
+    )
+    log("✅ Text encoder loaded successfully.")
+except Exception as e:
+    log(f"❌ Text encoder load failed: {e}")
+    text_encoder = None
+log_system_stats("AFTER TEXT ENCODER LOAD")
+if text_encoder:
+    inspect_transformer(text_encoder, "Text Encoder")
+# ============================================================
+# BUILD PIPELINE
+# ============================================================
+log("\n===================================================")
+log("🔧 BUILDING PIPELINE")
+log("===================================================")
+log_system_stats("START PIPELINE BUILD")
+try:
+    pipe = ZImagePipeline.from_pretrained(
+        model_id,
+        transformer=transformer,
+        text_encoder=text_encoder,
+        torch_dtype=torch_dtype,
+        attn_implementation="kernels-community/vllm-flash-attn3",
+    )
+    pipe.to(device)
+    log("✅ Pipeline built successfully.")
+except Exception as e:
+    log(f"❌ Pipeline build failed: {e}")
+    pipe = None
+log_system_stats("AFTER PIPELINE BUILD")
+from PIL import Image
+import torch
+def safe_generate_with_latents(
+transformer,
+vae,
+text_encoder,
+tokenizer,
+scheduler,
+pipe,
+prompt,
+height,
+width,
+steps,
+guidance_scale,
+negative_prompt,
+num_images_per_prompt,
+generator,
+cfg_normalization,
+cfg_truncation,
+max_sequence_length,
+):
+ try:
+ latents_or_images = generate(
+  transformer=transformer,
+  vae=vae,
+  text_encoder=text_encoder,
+  tokenizer=tokenizer,
+  scheduler=scheduler,
+  prompt=prompt,
+  height=height,
+  width=width,
+  num_inference_steps=steps,
+  guidance_scale=guidance_scale,
+  negative_prompt=negative_prompt,
+  num_images_per_prompt=num_images_per_prompt,
+  generator=generator,
+  cfg_normalization=cfg_normalization,
+  cfg_truncation=cfg_truncation,
+  max_sequence_length=max_sequence_length,
+  output_type="latent",  # IMPORTANT
+  )
+  return latents_or_images, None
+ except Exception as e:
+    return None, e
+def safe_get_latents(pipe, height, width, generator, device, LOGS):
+    """
+    Attempts multiple ways to get latents.
+    Returns a valid tensor even if pipeline hides UNet.
+    """
+    # Try official prepare_latents
+    try:
+        if hasattr(pipe, "unet") and hasattr(pipe.unet, "in_channels"):
+            num_channels = pipe.unet.in_channels
+            latents = pipe.prepare_latents(
+                batch_size=1,
+                num_channels=num_channels,
+                height=height,
+                width=width,
+                dtype=torch.float32,
+                device=device,
+                generator=generator
+            )
+            LOGS.append("✅ Latents extracted using official prepare_latents.")
+            return latents
+    except Exception as e:
+        LOGS.append(f"⚠️ Official latent extraction failed: {e}")
+    # Try hidden internal attribute
+    try:
+        if hasattr(pipe, "_default_latents"):
+            LOGS.append("⚠️ Using hidden _default_latents.")
+            return pipe._default_latents
+    except:
+        pass
+    # Fallback: raw Gaussian tensor
+    try:
+        LOGS.append("⚠️ Using raw Gaussian latents fallback.")
+        return torch.randn(
+            (1, 4, height // 8, width // 8),
+            generator=generator,
+            device=device,
+            dtype=torch.float32
+        )
+    except Exception as e:
+        LOGS.append(f"⚠️ Gaussian fallback failed: {e}")
+    LOGS.append("❗ Using CPU hard fallback latents.")
+    return torch.randn((1, 4, height // 8, width // 8))
+# --------------------------
+# Main generation function
+# --------------------------
+@spaces.GPU
+def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
+    LOGS = []
+    latents = None
+    image = None
+    gallery = []
+    # placeholder image if all fails
+    placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
+    print(prompt)
+    try:
+        generator = torch.Generator(device).manual_seed(int(seed))
+        # -------------------------------
+        # Try advanced latent extraction
+        # -------------------------------
+        try:
+            latents, latent_err = safe_generate_with_latents(
+               transformer=transformer,
+               vae=vae,
+               text_encoder=text_encoder,
+               tokenizer=tokenizer,
+               scheduler=scheduler,
+               pipe=pipe,
+               prompt=prompt,
+               height=height,
+               width=width,
+               steps=steps,
+               guidance_scale=guidance_scale,
+               negative_prompt="",
+               num_images_per_prompt=1,
+               generator=generator,
+               cfg_normalization=False,
+               cfg_truncation=1.0,
+               max_sequence_length=4096,
+               )
+            if latent_err is None:
+                 log("✅ Latent generator succeeded.")
+        try:
+          # Decode latents to image
+          shift_factor = getattr(vae.config, "shift_factor", 0.0) or 0.0
+          dec = (latents.to(vae.dtype) / vae.config.scaling_factor) + shift_factor
+          image = vae.decode(dec, return_dict=False)[0]
+          image = (image / 2 + 0.5).clamp(0, 1)
+          image = image.cpu().permute(0, 2, 3, 1).numpy()
+          image = (image * 255).round().astype("uint8")
+          from PIL import Image
+          image = Image.fromarray(image[0])
+          log("🟢 Final image decoded from latent generator.")
+          return image, latents, LOGS
+        except Exception as decode_error:
+        log(f"⚠️ Latent decode failed: {decode_error}")
+        log("🔁 Falling back to standard pipeline...")
+            image = output.images[0]
+            gallery = [image]
+            LOGS.append("✅ Advanced latent pipeline succeeded.")
+        except Exception as e:
+            LOGS.append(f"⚠️ Latent mode failed: {e}")
+            LOGS.append("🔁 Switching to standard pipeline...")
+            image = placeholder
+            gallery = [image]
+        # ========================================================== # 🟩 STANDARD PIPELINE FALLBACK (Never fails) # ==========================================================
+        try:
+                output = pipe(
+                    prompt=prompt,
+                    height=height,
+                    width=width,
+                    num_inference_steps=steps,
+                    guidance_scale=guidance_scale,
+                    generator=generator,
+                )
+                image = output.images[0]
+                gallery = [image]
+                LOGS.append("✅ Standard pipeline succeeded.")
+        except Exception as e2:
+                LOGS.append(f"❌ Standard pipeline failed: {e2}")
+                image = placeholder
+                gallery = [image]
+        return image, gallery, LOGS
+    except Exception as e:
+        LOGS.append(f"❌ Total failure: {e}")
+        return placeholder, [placeholder], LOGS
+# --------------------------
+# Helper: Safe latent extractor
+# --------------------------
+def safe_get_latents0(pipe, height, width, generator, device, LOGS):
+    """
+    Attempts multiple ways to get latents.
+    Returns a valid tensor even if pipeline hides UNet.
+    """
+    # Try official prepare_latents
+    try:
+        if hasattr(pipe, "unet") and hasattr(pipe.unet, "in_channels"):
+            num_channels = pipe.unet.in_channels
+            latents = pipe.prepare_latents(
+                batch_size=1,
+                num_channels=num_channels,
+                height=height,
+                width=width,
+                dtype=torch.float32,
+                device=device,
+                generator=generator
+            )
+            LOGS.append("✅ Latents extracted using official prepare_latents.")
+            return latents
+    except Exception as e:
+        LOGS.append(f"⚠️ Official latent extraction failed: {e}")
+    # Try hidden internal attribute
+    try:
+        if hasattr(pipe, "_default_latents"):
+            LOGS.append("⚠️ Using hidden _default_latents.")
+            return pipe._default_latents
+    except:
+        pass
+    # Fallback: raw Gaussian tensor
+    try:
+        LOGS.append("⚠️ Using raw Gaussian latents fallback.")
+        return torch.randn(
+            (1, 4, height // 8, width // 8),
+            generator=generator,
+            device=device,
+            dtype=torch.float32
+        )
+    except Exception as e:
+        LOGS.append(f"⚠️ Gaussian fallback failed: {e}")
+    LOGS.append("❗ Using CPU hard fallback latents.")
+    return torch.randn((1, 4, height // 8, width // 8))
+# --------------------------
+# Main generation function
+# --------------------------
+@spaces.GPU
+def generate_image0(prompt, height, width, steps, seed, guidance_scale=0.0):
+    LOGS = []
+    latents = None
+    image = None
+    gallery = []
+    # placeholder image if all fails
+    placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
+    print(prompt)
+    try:
+        generator = torch.Generator(device).manual_seed(int(seed))
+        # -------------------------------
+        # Try advanced latent extraction
+        # -------------------------------
+        try:
+            latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+            output = pipe(
+                prompt=prompt,
+                height=height,
+                width=width,
+                num_inference_steps=steps,
+                guidance_scale=guidance_scale,
+                generator=generator,
+                latents=latents
+            )
+            image = output.images[0]
+            gallery = [image]
+            LOGS.append("✅ Advanced latent pipeline succeeded.")
+        except Exception as e:
+            LOGS.append(f"⚠️ Latent mode failed: {e}")
+            LOGS.append("🔁 Switching to standard pipeline...")
+            try:
+                output = pipe(
+                    prompt=prompt,
+                    height=height,
+                    width=width,
+                    num_inference_steps=steps,
+                    guidance_scale=guidance_scale,
+                    generator=generator,
+                )
+                image = output.images[0]
+                gallery = [image]
+                LOGS.append("✅ Standard pipeline succeeded.")
+            except Exception as e2:
+                LOGS.append(f"❌ Standard pipeline failed: {e2}")
+                image = placeholder
+                gallery = [image]
+        return image, gallery, LOGS
+    except Exception as e:
+        LOGS.append(f"❌ Total failure: {e}")
+        return placeholder, [placeholder], LOGS
+# ============================================================
+# UI
+# ============================================================
+with gr.Blocks(title="Z-Image- experiment - dont run")as demo:
+  gr.Markdown("# **🚀 do not run Z-Image-Turbo — Final Image & Latents**")
+  with gr.Row():
+    with gr.Column(scale=1):
+        prompt = gr.Textbox(label="Prompt", value="boat in Ocean")
+        height = gr.Slider(256, 2048, value=1024, step=8, label="Height")
+        width = gr.Slider(256, 2048, value=1024, step=8, label="Width")
+        steps = gr.Slider(1, 50, value=20, step=1, label="Inference Steps")
+        seed = gr.Number(value=42, label="Seed")
+        run_btn = gr.Button("Generate Image")
+    with gr.Column(scale=1):
+        final_image = gr.Image(label="Final Image")
+        latent_gallery = gr.Gallery(
+           label="Latent Steps",
+                columns=4,
+              height=256,
+             preview=True
+              )
+        logs_box = gr.Textbox(label="Logs", lines=15)
+    run_btn.click(
+      generate_image,
+      inputs=[prompt, height, width, steps, seed],
+      outputs=[final_image, latent_gallery, logs_box]
+     )
+demo.launch()