"""
ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts
=======================================================================

A Gradio web interface for the ShapeWords paper, allowing users to generate
images guided by 3D shape information.

Author: Melinos Averkiou
Date: 24 March 2025
Version: 1.5

Paper: "ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts"
arXiv: https://arxiv.org/abs/2412.02912
Project Page: https://lodurality.github.io/shapewords/

Citation:
@misc{petrov2024shapewords,
    title={ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts},
    author={Dmitry Petrov and Pradyumn Goyal and Divyansh Shivashok and Yuanming Tao and Melinos Averkiou and Evangelos Kalogerakis},
    year={2024},
    eprint={2412.02912},
    archivePrefix={arXiv},
    primaryClass={cs.CV},
    url={https://arxiv.org/abs/2412.02912},
}

License: MIT License

Usage:
    python app.py [--share]

This demo allows users to:
1. Select a 3D object category from ShapeNetCore
2. Choose a specific 3D shape using a slider or the navigation buttons (including a random shape button)
3. Enter a text prompt or pick a random one
4. Generate images guided by the selected 3D shape and the text prompt

The code is structured as a class and is compatible with Hugging Face ZeroGPU deployment.
"""

import os
import sys
import numpy as np
import torch
import gradio as gr
from PIL import Image, ImageFont, ImageDraw
from diffusers.utils import load_image
from diffusers import DPMSolverMultistepScheduler, StableDiffusionPipeline
import gdown
import argparse
import random
import spaces # for Hugging Face ZeroGPU deployment
import re
import plotly.graph_objects as go
from numpy.lib.user_array import container
import shutil

# Only for Hugging Face hosting - Add the Hugging Face cache to persistent storage to avoid downloading safetensors every time the demo sleeps and wakes up
os.environ['HF_HOME'] = '/data/.huggingface'

class ShapeWordsDemo:
    # Constants
    NAME2CAT = {
        "chair": "03001627", "table": "04379243", "jar": "03593526", "skateboard": "04225987",
        "car": "02958343", "bottle": "02876657", "tower": "04460130", "bookshelf": "02871439",
        "camera": "02942699", "airplane": "02691156", "laptop": "03642806", "basket": "02801938",
        "sofa": "04256520", "knife": "03624134", "can": "02946921", "rifle": "04090263",
        "train": "04468005", "pillow": "03938244", "lamp": "03636649", "trash bin": "02747177",
        "mailbox": "03710193", "watercraft": "04530566", "motorbike": "03790512",
        "dishwasher": "03207941", "bench": "02828884", "pistol": "03948459", "rocket": "04099429",
        "loudspeaker": "03691459", "file cabinet": "03337140", "bag": "02773838",
        "cabinet": "02933112", "bed": "02818832", "birdhouse": "02843684", "display": "03211117",
        "piano": "03928116", "earphone": "03261776", "telephone": "04401088", "stove": "04330267",
        "microphone": "03759954", "bus": "02924116", "mug": "03797390", "remote": "04074963",
        "bathtub": "02808440", "bowl": "02880940", "keyboard": "03085013", "guitar": "03467517",
        "washer": "04554684", "bicycle": "02834778", "faucet": "03325088", "printer": "04004475",
        "cap": "02954340", "phone": "02992529", "clock": "03046257", "helmet": "03513137",
        "microwave": "03761084", "plant": "03991062"
    }

    PREDEFINED_PROMPTS = [
        'a low poly 3d rendering of a [CATEGORY]',
        'an aquarelle drawing of a [CATEGORY]',
        'a photo of a [CATEGORY] on a beach',
        'a charcoal drawing of a [CATEGORY]',
        'a Hieronymus Bosch painting of a [CATEGORY]',
        'a [CATEGORY] under a tree',
        'A Kazimir Malevich painting of a [CATEGORY]',
        'a vector graphic of a [CATEGORY]',
        'a Claude Monet painting of a [CATEGORY]',
        'a Salvador Dali painting of a [CATEGORY]',
        'an Art Deco poster of a [CATEGORY]'
    ]

    def __init__(self):
        # Initialize class attributes
        self.pipeline = None
        self.shape2clip_model = None
        self.text_encoder = None
        self.tokenizer = None
        self.category_embeddings = {}
        self.category_counts = {}
        self.available_categories = []
        self.shape_thumbnail_cache = {}  # Cache for shape thumbnails
        self.CAT2NAME = {v: k for k, v in self.NAME2CAT.items()}
        self.category_point_clouds = {}

        # Initialize all models and data
        self.initialize_models()

    def initialize_models(self):
        # device = DEVICE
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device} in initialize_models")

        # Download Shape2CLIP code if it doesn't exist
        if not os.path.exists("shapewords_paper_code/geometry_guidance_models.py"):
            shutil.rmtree("shapewords_paper_code/", ignore_errors=True)
            print("Loading models file")
            os.system("git clone https://github.com/lodurality/shapewords_paper_code.git")

        # Import Shape2CLIP model
        sys.path.append("./shapewords_paper_code")
        from shapewords_paper_code.geometry_guidance_models import Shape2CLIP

        # Initialize the pipeline
        self.pipeline = StableDiffusionPipeline.from_pretrained(
            "stabilityai/stable-diffusion-2-1-base",
            torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
        )

        self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
            self.pipeline.scheduler.config,
            algorithm_type="sde-dpmsolver++"
        )

        self.text_encoder = self.pipeline.text_encoder
        self.tokenizer = self.pipeline.tokenizer

        # Look for Shape2CLIP checkpoint in multiple locations
        checkpoint_paths = [
            "./projection_model-0920192.pth",
            "/data/projection_model-0920192.pth" # if using Hugging Face persistent storage look in a /data/ directory
        ]

        checkpoint_found = False
        checkpoint_path = None
        for path in checkpoint_paths:
            if os.path.exists(path):
                checkpoint_path = path
                print(f"Found Shape2CLIP checkpoint at: {checkpoint_path}")
                checkpoint_found = True
                break

        # Download Shape2CLIP checkpoint if not found
        if not checkpoint_found:
            checkpoint_path = "projection_model-0920192.pth"
            print("Downloading Shape2CLIP model checkpoint...")
            gdown.download("https://drive.google.com/uc?id=1nvEXnwMpNkRts6rxVqMZt8i9FZ40KjP7", checkpoint_path, quiet=False) # download in same directory as app.py
            print("Download complete")

        # Initialize Shape2CLIP model
        self.shape2clip_model = Shape2CLIP(depth=6, drop_path_rate=0.1, pb_dim=384)
        self.shape2clip_model.load_state_dict(torch.load(checkpoint_path, map_location=device))
        self.shape2clip_model.eval()

        # Scan for available embeddings
        self.scan_available_embeddings()

    def scan_available_embeddings(self):
        self.available_categories = []
        self.category_counts = {}

        # Try to find PointBert embeddings for all 55 ShapeNetCore shape categories
        for category, cat_id in self.NAME2CAT.items():
            possible_filenames = [
                f"{cat_id}_pb_embs.npz",
                f"embeddings/{cat_id}_pb_embs.npz", 
                f"/data/shapenet_pointbert_tokens/{cat_id}_pb_embs.npz" # if using Hugging Face persistent storage look in a /data/shapenet_pointbert_tokens directory
            ]

            found_file = None
            for filename in possible_filenames:
                if os.path.exists(filename):
                    found_file = filename
                    break

            if found_file:
                try:
                    pb_data = np.load(found_file)
                    if 'ids' in pb_data:
                        count = len(pb_data['ids'])
                    else:
                        # Try to infer the correct keys
                        keys = list(pb_data.keys())
                        if len(keys) >= 1:
                            count = len(pb_data[keys[0]])
                        else:
                            count = 0

                    if count > 0:
                        self.available_categories.append(category)
                        self.category_counts[category] = count
                        print(f"Found {count} embeddings for category '{category}'")
                except Exception as e:
                    print(f"Error loading embeddings for {category}: {e}")

        # Sort categories alphabetically
        self.available_categories.sort()

        print(f"Found {len(self.available_categories)} categories with embeddings")
        print(f"Available categories: {', '.join(self.available_categories)}")
        
        # No embeddings found for any category - DEMO CANNOT RUN - but still load the interface with a default placeholder category, an error will be displayed when trying to generate images
        if not self.available_categories:
            self.available_categories = ["chair"]  # Fallback
            self.category_counts["chair"] = 50     # Default value

    def load_category_embeddings(self, category):
        if category in self.category_embeddings:
            return self.category_embeddings[category]

        if category not in self.NAME2CAT:
            return None, []

        cat_id = self.NAME2CAT[category]

        # Check for different possible embedding filenames and locations
        possible_filenames = [
            f"{cat_id}_pb_embs.npz",
            f"embeddings/{cat_id}_pb_embs.npz",
            f"/data/shapenet_pointbert_tokens/{cat_id}_pb_embs.npz" # if using Hugging Face persistent storage look in a /data/shapenet_pointbert_tokens directory
        ]

        # Find the first existing file
        pb_emb_filename = None
        for filename in possible_filenames:
            if os.path.exists(filename):
                pb_emb_filename = filename
                print(f"Found embeddings file: {pb_emb_filename}")
                break

        if pb_emb_filename is None:
            print(f"No embeddings found for {category}")
            return None, []

        # Load embeddings
        try:
            print(f"Loading embeddings from {pb_emb_filename}...")
            pb_data = np.load(pb_emb_filename)

            # Check for different key names in the NPZ file
            if 'ids' in pb_data and 'embs' in pb_data:
                pb_dict = dict(zip(pb_data['ids'], pb_data['embs']))
            else:
                # Try to infer the correct keys
                keys = list(pb_data.keys())
                if len(keys) >= 2:
                    # Assume first key is for IDs and second is for embeddings
                    pb_dict = dict(zip(pb_data[keys[0]], pb_data[keys[1]]))
                else:
                    print("Unexpected embedding file format")
                    return None, []

            all_ids = sorted(list(pb_dict.keys()))
            print(f"Loaded {len(all_ids)} shape embeddings for {category}")

            # Cache the results
            self.category_embeddings[category] = (pb_dict, all_ids)
            return pb_dict, all_ids
        except Exception as e:
            print(f"Error loading embeddings: {e}")
            print(f"Exception details: {str(e)}")
            return None, []

    def load_category_point_clouds(self, category):
        """Load all point clouds for a category from a single NPZ file"""
        if category not in self.NAME2CAT:
            return None

        cat_id = self.NAME2CAT[category]

        # Cache to avoid reloading
        if category in self.category_point_clouds:
            return self.category_point_clouds[category]

        # Check for different possible point cloud filenames
        possible_filenames = [
            f"{cat_id}.npz",
            f"point_clouds/{cat_id}_clouds.npz",
            f"/point_clouds/{cat_id}_clouds.npz",
            f"/data/point_clouds/{cat_id}_clouds.npz"  # For Hugging Face persistent storage
        ]

        # Find the first existing file
        pc_filename = None
        for filename in possible_filenames:
            if os.path.exists(filename):
                pc_filename = filename
                print(f"Found point cloud file: {pc_filename}")
                break

        if pc_filename is None:
            print(f"No point cloud file found for category {category}")
            return None

        # Load point clouds
        try:
            print(f"Loading point clouds from {pc_filename}...")
            pc_data_map = np.load(pc_filename, allow_pickle=False)
            pc_data = {'ids': pc_data_map['ids'], 'clouds': pc_data_map['clouds']}
            # Cache the loaded data
            self.category_point_clouds[category] = pc_data

            return pc_data
        except Exception as e:
            print(f"Error loading point clouds: {e}")
            return None

    def get_shape_preview(self, category, shape_idx):
        """Get a 3D point cloud visualization for a specific shape"""
        if shape_idx is None or shape_idx < 0:
            return None

        # Get shape ID
        pb_dict, all_ids = self.load_category_embeddings(category)
        if pb_dict is None or not all_ids or shape_idx >= len(all_ids):
            return None

        shape_id = all_ids[shape_idx]

        # Load all point clouds for this category
        pc_data = self.load_category_point_clouds(category)
        if pc_data is None:
            # Fallback to image if point clouds not available
            return self.get_shape_image_preview(category, shape_idx, shape_id)

        # Extract point cloud for this specific shape
        try:
            # Get the arrays from the npz file
            ids = pc_data['ids']
            clouds = pc_data['clouds']

            matching_indices = np.where(ids == shape_id)[0]

            # Check number of matches
            if len(matching_indices) == 0:
                # No matches found - log error and fall back to image
                print(f"Error: Shape ID {shape_id} not found in point cloud data")
                return self.get_shape_image_preview(category, shape_idx, shape_id)
            elif len(matching_indices) > 1:
                # Multiple matches found - unexpected data issue - we will get the first one
                print(f"Warning: Multiple matches ({len(matching_indices)}) found for Shape ID {shape_id}. Using first match.")

            # Get the corresponding point cloud
            matching_idx = matching_indices[0]
            points = clouds[matching_idx]

            # Create 3D visualization
            fig = self.get_shape_pointcloud_preview(points, title=f"Shape #{shape_idx}")
            return fig

        except Exception as e:
            print(f"Error extracting point cloud for {shape_id}: {e}")
            return self.get_shape_image_preview(category, shape_idx, shape_id)

    def get_shape_image_preview(self, category, shape_idx, shape_id):
        """Fallback to image preview if point cloud not available"""
        try:
            preview_image = self.get_ulip_image(shape_id)
            preview_image = preview_image.resize((300, 300))

            # Convert PIL image to plotly figure
            fig = go.Figure()

            # Need to convert PIL image to a format plotly can use
            import io
            import base64

            # Convert PIL image to base64
            buf = io.BytesIO()
            preview_image.save(buf, format='PNG')
            img_str = base64.b64encode(buf.getvalue()).decode('utf-8')

            # Add image to figure
            fig.add_layout_image(
                dict(
                    source=f"data:image/png;base64,{img_str}",
                    xref="paper", yref="paper",
                    x=0, y=1,
                    sizex=1, sizey=1,
                    sizing="contain",
                    layer="below"
                )
            )

            fig.update_layout(
                title=f"Shape 2D Preview - 3D not available",
                xaxis=dict(showgrid=False, zeroline=False, visible=False, range=[0, 1]),
                yaxis=dict(showgrid=False, zeroline=False, visible=False, range=[0, 1], scaleanchor="x", scaleratio=1),
                margin=dict(l=0, r=0, b=0, t=0),
                plot_bgcolor='rgba(0,0,0,0)'  # Transparent background
            )

            return fig
        except Exception as e:
            print(f"Error loading preview for {shape_id}: {e}")
            # Create empty figure with error message
            fig = go.Figure()
            fig.update_layout(
                title=f"Error loading Shape #{shape_idx}",
                annotations=[dict(
                    text="Preview not available",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.5, y=0.5,
                    ont=dict(size=16, color="#E53935"),  # Red error text
                    align="center"
                )],
                margin=dict(l=0, r=0, b=0, t=0, pad=0),
                plot_bgcolor='rgba(0,0,0,0)'  # Transparent background
            )
            return fig

    def get_shape_pointcloud_preview(self, points, title=None):
        """Create a clean 3D point cloud visualization with Y as up axis"""
        # Sample points for better performance (fewer points = smoother interaction)
        sampled_points = points[::1]  # Take every Nth point

        # Create 3D scatter plot with fixed color
        fig = go.Figure(data=[go.Scatter3d(
            x=sampled_points[:, 0],
            y=sampled_points[:, 1],  # Use Z as Y (up axis)
            z=sampled_points[:, 2],  # Use Y as Z
            mode='markers',
            marker=dict(
                size=2.5,
                color='#4285F4',  # Fixed blue color
                opacity=1
            )
        )])

        fig.update_layout(
            title=None,
            scene=dict(
                # Remove all axes elements
                xaxis=dict(visible=False, showticklabels=False, showgrid=False, zeroline=False, showline=False,
                           showbackground=False),
                yaxis=dict(visible=False, showticklabels=False, showgrid=False, zeroline=False, showline=False,
                           showbackground=False),
                zaxis=dict(visible=False, showticklabels=False, showgrid=False, zeroline=False, showline=False,
                           showbackground=False),
                aspectmode='data'  # Maintain data aspect ratio
            ),
            # Eliminate margins
            margin=dict(l=0, r=0, b=0, t=0, pad=0),
            autosize=True,
            # Control modebar appearance through layout
            modebar=dict(
                bgcolor='white',
                color='#333',
                orientation='v',  # Vertical orientation
                activecolor='#009688'
            ),
            paper_bgcolor='rgba(0,0,0,0)',  # Transparent background
        )

        # Better camera angle
        fig.update_layout(
            scene_camera=dict(
                eye=dict(x=-1.5, y=0.5, z=-1.5),
                up=dict(x=0, y=1, z=0),  # Y is up
                center=dict(x=0, y=0, z=0)
            )
        )

        return fig

    def get_ulip_image(self, guidance_shape_id, angle='036'):
        shape_id_ulip = guidance_shape_id.replace('_', '-')
        ulip_template = 'https://storage.googleapis.com/sfr-ulip-code-release-research/shapenet-55/only_rgb_depth_images/{}_r_{}_depth0001.png'
        ulip_path = ulip_template.format(shape_id_ulip, angle)

        try:
            ulip_image = load_image(ulip_path).resize((512, 512))
            return ulip_image
        except Exception as e:
            print(f"Error loading image: {e}")
            return Image.new('RGB', (512, 512), color='gray')

    def on_slider_change(self, shape_idx, category):
        """Update the preview when the slider changes"""
        max_idx = self.category_counts.get(category, 0) - 1

        # Get shape preview
        shape_preview = self.get_shape_preview(category, shape_idx)

        # Update counter text
        counter_text = f"Shape {shape_idx} of {max_idx}"

        return shape_preview, counter_text, shape_idx

    def prev_shape(self, current_idx):
        """Go to previous shape"""
        new_idx = max(0, current_idx - 1)
        return new_idx

    def next_shape(self, current_idx, category):
        """Go to next shape"""
        max_idx = self.category_counts.get(category, 0) - 1
        new_idx = min(max_idx, current_idx + 1)
        return new_idx

    def jump_to_start(self):
        """Jump to the first shape"""
        return 0

    def jump_to_end(self, category):
        """Jump to the last shape"""
        max_idx = self.category_counts.get(category, 0) - 1
        return max_idx

    def random_shape(self, category):
        """Select a random shape from the category"""
        max_idx = self.category_counts.get(category, 0) - 1
        if max_idx <= 0:
            return 0
        # Generate random index
        random_idx = random.randint(0, max_idx)
        return random_idx

    def random_prompt(self):
        """Select a random prompt from the predefined list"""

        return random.choice(self.PREDEFINED_PROMPTS)

    def on_category_change(self, category):
        """Update the slider and preview when the category changes"""
        # Reset to the first shape
        current_idx = 0
        max_idx = self.category_counts.get(category, 0) - 1

        # Get preview image
        preview_image = self.get_shape_preview(category, current_idx)

        # Update counter text
        counter_text = f"Shape {current_idx} of {max_idx}"

        # Need to update the slider range
        new_slider = gr.Slider(
            minimum=0,
            maximum=max_idx,
            step=1,
            value=current_idx,
            label="Shape Index"
        )

        return new_slider, current_idx, preview_image, counter_text

    def get_guidance(self, test_prompt, category_name, guidance_emb):
        print(test_prompt, category_name)
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device} in get_guidance")

        prompt_tokens = torch.LongTensor(self.tokenizer.encode(test_prompt, padding='max_length')).to(device)

        with torch.no_grad():
            out = self.text_encoder(prompt_tokens.unsqueeze(0), output_attentions=True)
            prompt_emb = out.last_hidden_state.detach().clone()

        if len(guidance_emb.shape) == 1:
            guidance_emb = torch.FloatTensor(guidance_emb).unsqueeze(0).unsqueeze(0)
        else:
            guidance_emb = torch.FloatTensor(guidance_emb).unsqueeze(0)
        guidance_emb = guidance_emb.to(device)

        eos_inds = torch.where(prompt_tokens.unsqueeze(0) == 49407)[1]
        obj_word = category_name
        obj_word_token = self.tokenizer.encode(obj_word)[-2]
        chair_inds = torch.where(prompt_tokens.unsqueeze(0) == obj_word_token)[1]

        eos_strength = 0.8
        obj_strength = 1.0

        self.shape2clip_model.eval()
        with torch.no_grad():
            guided_prompt_emb_cond = self.shape2clip_model(prompt_emb.float(), guidance_emb[:,:,:].float()).half()
            guided_prompt_emb = guided_prompt_emb_cond.clone()

        guided_prompt_emb[:,:1] = 0
        guided_prompt_emb[:,:chair_inds] = 0
        guided_prompt_emb[:,chair_inds] *= obj_strength
        guided_prompt_emb[:,eos_inds+1:] = 0
        guided_prompt_emb[:,eos_inds] *= eos_strength
        guided_prompt_emb[:,chair_inds+1:eos_inds:] = 0
        fin_guidance = guided_prompt_emb

        return fin_guidance, prompt_emb

    @spaces.GPU(duration=120)
    def generate_images(self, prompt, category, selected_shape_idx, guidance_strength, seed):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device} in generate_images")

        # Move models to gpu
        if device.type == "cuda":
            self.pipeline = self.pipeline.to(device)
            self.shape2clip_model = self.shape2clip_model.to(device)

        # Clear status text immediately
        status = ""

        # Replace [CATEGORY] with the selected category (case-insensitive)
        category_pattern = re.compile(r'\[CATEGORY\]', re.IGNORECASE)
        if re.search(category_pattern, prompt):
            # Use re.sub for replacement to maintain the same casing pattern that was used
            final_prompt = re.sub(category_pattern, category, prompt)
        else:
            # Fallback if user didn't use placeholder
            final_prompt = f"{prompt} {category}"
            status += self.create_status_message(
                f"Warning! For better results, use [CATEGORY] in your prompt where you want '{category}' to appear, otherwise it is appended at the end of the prompt.",
                "info"
            )

        error = False
        # Check if prompt contains any other categories
        for other_category in self.available_categories:
            if re.search(r'\b' + re.escape(other_category) + r'\b', prompt, re.IGNORECASE):
                status += self.create_status_message(
                    f"Error! Your prompt contains '{other_category}'. Please remove it and use [CATEGORY] instead.",
                    "error"
                )
                error = True
        if error:
            return [], status

        # Load category embeddings if not already loaded
        pb_dict, all_ids = self.load_category_embeddings(category)
        if pb_dict is None or not all_ids:
            status += self.create_status_message(
                f"Error! Unable to load embeddings for {category}",
                "error"
            )
            return [], status

        # Ensure shape index is valid
        if selected_shape_idx is None or selected_shape_idx < 0:
            selected_shape_idx = 0

        max_idx = len(all_ids) - 1
        selected_shape_idx = max(0, min(selected_shape_idx, max_idx))
        guidance_shape_id = all_ids[selected_shape_idx]

        # Set generator
        generator = torch.Generator(device=device).manual_seed(seed)

        results = []

        try:
            # Generate base image (without guidance)
            with torch.no_grad():
                base_images = self.pipeline(
                    prompt=final_prompt,
                    num_inference_steps=50,
                    num_images_per_prompt=1,
                    generator=generator,
                    guidance_scale=7.5
                ).images

            results.append((base_images[0], "Unguided Result"))
        except Exception as e:
            print(f"Error generating base image: {e}")
            status += self.create_status_message(
                f"Error! Unable to generate base image: {str(e)}",
                "error"
            )
            return results, status

        try:
            # Get shape guidance embedding
            pb_emb = pb_dict[guidance_shape_id]
            out_guidance, prompt_emb = self.get_guidance(final_prompt, category, pb_emb)
        except Exception as e:
            print(f"Error generating guidance: {e}")
            status += self.create_status_message(
                f"Error! Unable to generate guidance: {str(e)}",
                "error"
            )
            return results, status

        try:
            # Generate guided image
            generator = torch.Generator(device=device).manual_seed(seed)
            with torch.no_grad():
                guided_images = self.pipeline(
                    prompt_embeds=prompt_emb + guidance_strength * out_guidance,
                    num_inference_steps=50,
                    num_images_per_prompt=1,
                    generator=generator,
                    guidance_scale=7.5
                ).images

            results.append((guided_images[0], f"Guided Result (λ = {guidance_strength})"))

            # Success status
            status += self.create_status_message(
                f"Success! Generated image guided by Shape #{selected_shape_idx} from category '{category}'.",
                "success"
            )

            torch.cuda.empty_cache()

        except Exception as e:
            print(f"Error generating guided image: {e}")
            status += self.create_status_message(
                f"Error! Unable to generate guided image: {str(e)}",
                "error"
            )
            return results, status

        return results, status

    def create_status_message(self, content, type_="info"):
        # Define styles for different message types
        styles = {
            "info": {
                "bg": "rgba(33, 150, 243, 0.15)",
                "border": "#2196F3",
                "icon": "ℹ️",
                "title": "NOTE: "
            },
            "error": {
                "bg": "rgba(244, 67, 54, 0.15)",
                "border": "#F44336",
                "icon": "❌",
                "title": "ERROR: "
            },
            "success": {
                "bg": "rgba(76, 175, 80, 0.15)",
                "border": "#4CAF50",
                "icon": "✅",
                "title": "SUCCESS: "
            },
            "waiting": {
                "bg": "rgba(255, 193, 7, 1)",
                "border": "#FFC107",
                "icon": "⏳",
                "title": "PROCESSING: "
            }
        }

        style = styles.get(type_, styles["info"])

        font_weight = "bold" if type_ == "waiting" else "normal"
        animation_style = "animation: pulse 1.5s infinite;" if type_ == "waiting" else ""

        return f"""
        <div style='
            padding: 12px; 
            background-color: {style["bg"]}; 
            border-left: 5px solid {style["border"]}; 
            margin-bottom: 12px;
            border-radius: 4px;
            display: flex;
            align-items: flex-start;
            gap: 8px;
            box-shadow: 0 1px 3px rgba(0,0,0,0.12);
            font-weight: {font_weight};
            {animation_style}
        '>
            <style>
            @keyframes pulse {{
                0%, 100% {{ opacity: 1; }}
                50% {{ opacity: 0.7; }}
            }}
            </style>
            <div style='font-size: 18px; line-height: 1.2;'>{style["icon"]}</div>
            <div>{content}</div>
        </div>
        """

    def on_demo_load(self):
        """Function to ensure initial image is loaded when demo starts"""
        default_category = "chair" if "chair" in self.available_categories else self.available_categories[0]
        initial_img = self.get_shape_preview(default_category, 0)
        return initial_img

    def create_ui(self):
        # Ensure chair is in available categories, otherwise use the first available
        default_category = "chair" if "chair" in self.available_categories else self.available_categories[0]

        with gr.Blocks(title="ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts",
                       theme=gr.themes.Soft(
                           primary_hue="orange",
                           secondary_hue="blue",
                           font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
                           font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "ui-monospace", "Consolas", "monospace"],
                       ),
                       css="""
                      /* Base styles */
                      .container { max-width: 1400px; margin: 0 auto; }

                      /* Title headers */
                      .title { text-align: center; font-size: 26px; font-weight: 600; margin-bottom: 3px; }
                      .subtitle { text-align: center; font-size: 16px; margin-bottom: 3px; }
                      .authors { text-align: center; font-size: 15px; margin-bottom: 3px; }
                      .affiliations { text-align: center; font-size: 13px; margin-bottom: 3px; }
                      
                      /* Instructions Accordion */
                      button.instructions-accordion > span,
                      .instructions-accordion button > span {
                          font-size: 17px !important;
                          font-weight: 600 !important;
                      }
                        
                      .instructions-accordion + div p,
                      .instructions-accordion + div li,
                      .instructions-text p,
                      .instructions-text li {
                          font-size: 14px !important;
                      }
                        
                      /* Section Headers */
                      .step-header,.settings-header { 
                          font-size: 18px; 
                          font-weight: 600; 
                          margin-top: 5px;
                          margin-bottom: 5px;
                      }

                      .sub-header {
                          margin-top: 5px;
                          margin-bottom: 5px;
                          padding-left: 5px;
                      }

                      /* Buttons for project page, paper, code etc*/
                      .buttons-container { margin: 0 auto 10px; }
                      .buttons-row { display: flex; justify-content: center; gap: 10px; flex-wrap: nowrap; }
                      .nav-button {
                          display: inline-block;
                          padding: 6px 12px;
                          background-color: #363636;
                          color: white !important;
                          text-decoration: none;
                          border-radius: 20px;
                          font-weight: 500;
                          font-size: 14px;
                          transition: background-color 0.2s;
                          text-align: center;
                          white-space: nowrap;
                      }
                      .nav-button:hover { background-color: #505050; }
                      .nav-button.disabled { 
                          opacity: 0.6; 
                          cursor: not-allowed;
                      }

                      /* Prompt design section elements */
                      .category-dropdown .wrap { font-size: 16px; }
                      .prompt-input { flex-grow: 1; }
                      .prompt-button { 
                            align-self: center;  /* Vertical centering */
                            margin-left: auto;   /* Horizontal centering */
                            margin-right: auto;
                            display: block;      /* Makes margins work for centering */
                      }
                      /* Shape selection section elements */
                      .shape-navigation { 
                          display: flex; 
                          justify-content: center; 
                          align-items: center; 
                          margin: 10px auto;
                          gap: 15px;
                          max-width: 320px;
                      }
                      .shape-navigation button { 
                          min-width: 40px; 
                          max-width: 60px; 
                          width: auto; 
                          padding: 6px 10px; 
                      }
                      .nav-icon-btn { font-size: 18px; }
                      /* Generate button */
                      .generate-button { 
                          font-size: 18px !important; 
                          padding: 12px !important; 
                          margin: 15px 0 !important; 
                          background: linear-gradient(135deg, #f97316, #fb923c) !important;
                      }
                      /* Results section elements */
                      .results-gallery { min-height: 100px; max-height: 500px; display: flex; align-items: center; justify-content: center; }
                      .results-gallery .grid-container { display: flex; align-items: center; }
                      /* About section elements */
                      .about-section { font-size: 16px; margin-top: 40px; padding: 20px; border-top: 1px solid rgba(128, 128, 128, 0.2); }
                    
                      /* Responsive adjustments for mobile mode*/
                      @media (max-width: 768px) {
                          .shape-navigation { 
                              max-width: 100%;
                              gap: 5px;
                          }
                          .shape-navigation button { 
                              min-width: 36px;
                              padding: 6px 0;
                              font-size: 16px;
                          }
                          .buttons-row {
                              gap: 5px;
                          }
                          .nav-button {
                              padding: 5px 8px;
                              font-size: 13px;
                          }
                          .results-gallery {
                              max-height: 320px;
                          } 
                      }

                      /* Dark mode overrides */
                      @media (prefers-color-scheme: dark) {
                          .nav-button {
                              background-color: #505050;
                          }
                          .nav-button:hover {
                              background-color: #666666;
                          }  
                      }
                      """) as demo:
            # Header with title and links
            gr.Markdown("# ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts",
                        elem_classes="title")
            gr.Markdown("### CVPR 2025", elem_classes="subtitle")
            gr.Markdown(
                "Dmitry Petrov<sup>1</sup>, Pradyumn Goyal<sup>1</sup>, Divyansh Shivashok<sup>1</sup>, Yuanming Tao<sup>1</sup>, Melinos Averkiou<sup>2,3</sup>, Evangelos Kalogerakis<sup>1,2,4</sup>",
                elem_classes="authors")
            gr.Markdown(
                "<sup>1</sup>UMass Amherst    <sup>2</sup>CYENS CoE    <sup>3</sup>University of Cyprus    <sup>4</sup>TU Crete",
                elem_classes="affiliations")

            # Navigation buttons
            with gr.Row():
                with gr.Column(scale=3):
                    pass  # Empty space for alignment
                with gr.Column(scale=2, elem_classes="buttons-container"):
                    gr.HTML("""
                    <div class="buttons-row">
                        <a href="https://arxiv.org/abs/2412.02912" target="_blank" class="nav-button">
                            arXiv
                        </a>
                        <a href="https://lodurality.github.io/shapewords/" target="_blank" class="nav-button">
                            Project Page
                        </a>
                        <a href="#" target="_blank" class="nav-button disabled">
                            Code
                        </a>
                        <a href="#" target="_blank" class="nav-button disabled">
                            Data
                        </a>
                    </div>
                    """)
                with gr.Column(scale=3):
                    pass  # Empty space for alignment

            # Add instructions

            with gr.Accordion("📋 Instructions", open=True, elem_classes="instructions-accordion"):
                gr.Markdown("""
                1️⃣ Select an shape category from the dropdown menu -- overall 55 categories. We recommend trying chair (default), car, lamp and bottle categories.

                2️⃣ Create a text prompt using **[CATEGORY]** as a placeholder or use **"Random prompt"** button to select from a small set of pre-defined prompts

                3️⃣ Adjust **guidance strength** to control shape influence. Use the default 0.9 value for best balance between prompt and shape adherence. Value of 0.0 corresponds to unguided result that is based just on input prompt.

                4️⃣ (optional) Choose **random seed**. For a fixed combination of input prompt and random seed, unguided image will always be the same.
                
                5️⃣ Choose **guidance 3D shape** using the slider, navigation or random shape buttons. Shapes come from ShapeNet dataset (~55K shapes across all categories)

                6️⃣ Click **Generate Images** button at the bottom to create images that follow both your text prompt and the selected 3D shape geometry
                """, elem_classes="instructions-text")

            # Hidden field to store selected shape index
            selected_shape_idx = gr.Number(value=0, visible=False)

            # Prompt Design (full width)
            with gr.Group():
                gr.Markdown("### 📝 Prompt Design", elem_classes="step-header")

                with gr.Row():
                    category = gr.Dropdown(
                        label="1️⃣ Shape Category",
                        choices=self.available_categories,
                        value=default_category,
                        container=True,
                        elem_classes="category-dropdown",
                        scale=2
                    )

                    prompt = gr.Textbox(
                        label="2️⃣ Text Prompt - Use [CATEGORY] as a placeholder, e.g. 'a [CATEGORY] under a tree'",
                        placeholder="an aquarelle drawing of a [CATEGORY]",
                        value="an aquarelle drawing of a [CATEGORY]",
                        lines=1,
                        scale=5,
                        elem_classes="prompt-input"
                    )

                    random_prompt_btn = gr.Button("🎲 Random\nPrompt",
                                                  size="lg",
                                                  scale=1,
                                                  elem_classes="prompt-button")


            # Generation Settings (full width)
            with gr.Group():
                gr.Markdown("### ⚙️ Generation Settings", elem_classes="settings-header")

                with gr.Row():
                    with gr.Column():
                        guidance_strength = gr.Slider(
                            minimum=0.0, maximum=1.0, step=0.1, value=0.9,
                            label="3️⃣ Guidance Strength (λ) - Higher λ = stronger shape adherence"
                        )
                    with gr.Column():
                        seed = gr.Slider(
                            minimum=0, maximum=10000, step=1, value=42,
                            label="4️⃣ Random Seed - (optional) Change for different variations"
                        )

            # Middle section - Shape Selection and Results side by side
            with gr.Row(equal_height=True):
                # Left column - Shape Selection
                with gr.Column():
                    with gr.Group():
                        gr.Markdown("### 🔍 Shape Selection", elem_classes="step-header")

                        shape_slider = gr.Slider(
                            minimum=0,
                            maximum=self.category_counts.get(default_category, 0) - 1,
                            step=1,
                            value=0,
                            label="5️⃣ Shape Index - Choose a 3D shape to guide image generation",
                            interactive=True
                        )

                        shape_counter = gr.Markdown(f"Shape 0 of {self.category_counts.get(default_category, 0) - 1}", elem_classes="sub-header")

                        current_shape_plot = gr.Plot(show_label=False)

                        # Navigation buttons - Icons only for better mobile compatibility
                        with gr.Row(elem_classes="shape-navigation"):
                            jump_start_btn = gr.Button("⏮️", size="sm", elem_classes="nav-icon-btn")
                            prev_shape_btn = gr.Button("◀️", size="sm", elem_classes="nav-icon-btn")
                            random_btn = gr.Button("🎲", size="sm", variant="secondary", elem_classes="nav-icon-btn")
                            next_shape_btn = gr.Button("▶️", size="sm", elem_classes="nav-icon-btn")
                            jump_end_btn = gr.Button("⏭️", size="sm", elem_classes="nav-icon-btn")

                # Right column - Results
                with gr.Column():
                    with gr.Group():
                        gr.Markdown("### 🖼️ Generated Results Preview", elem_classes="step-header")
                        gallery = gr.Gallery(
                            label="Results",
                            show_label=False,
                            elem_id="results_gallery",
                            columns=2,
                            elem_classes="results-gallery"
                        )

            # Generate button (full width)
            with gr.Row():
                run_button = gr.Button(" 6️⃣ ✨ Generate Images guided by Selected Shape", variant="primary", size="lg",
                                           elem_classes="generate-button")

            # Status message (full width)
            with gr.Row():
                status_text = gr.HTML("", elem_classes="status-message")

            # About section at the bottom of the page
            with gr.Group(elem_classes="about-section"):
                gr.Markdown("""
                ### About ShapeWords

                ShapeWords incorporates target 3D shape information with text prompts to guide image synthesis.

                ### How It Works
                
                1. Select an shape category from the dropdown menu -- overall 55 categories. We recommend trying chair (default), car, lamp and bottle categories.
                2. Create a text prompt using **[CATEGORY]** as a placeholder or use **"Random prompt"** button to select from a small set of pre-defined prompts
                3. Adjust **guidance strength** to control shape influence. Use the default 0.9 value for best balance between prompt and shape adherence. Value of 0.0 corresponds to unguided result that is based just on input prompt.
                4. (optional) Choose **random seed**. For a fixed combination of input prompt and random seed, unguided image will always be the same.
                5. Choose **guidance 3D shape** using the slider, navigation or random shape buttons. Shapes come from ShapeNet dataset (~55K shapes across all categories)
                6. Click **Generate Images** button at the bottom to create images that follow both your text prompt and the selected 3D shape geometry
                
                ### Citation
                ```
                @misc{petrov2024shapewords,
                      title={ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts}, 
                      author={Dmitry Petrov and Pradyumn Goyal and Divyansh Shivashok and Yuanming Tao and Melinos Averkiou and Evangelos Kalogerakis},
                      year={2024},
                      eprint={2412.02912},
                      archivePrefix={arXiv},
                      primaryClass={cs.CV},
                      url={https://arxiv.org/abs/2412.02912}, 
                }
                ```
                """)

            # Make sure the initial image is loaded when the demo starts
            demo.load(
                fn=self.on_demo_load,
                inputs=None,
                outputs=[current_shape_plot]
            )

            # Connect slider to update preview
            shape_slider.change(
                fn=self.on_slider_change,
                inputs=[shape_slider, category],
                outputs=[current_shape_plot, shape_counter, selected_shape_idx]
            )

            # Previous shape button
            prev_shape_btn.click(
                fn=self.prev_shape,
                inputs=[selected_shape_idx],
                outputs=[shape_slider]
            )

            # Next shape button
            next_shape_btn.click(
                fn=self.next_shape,
                inputs=[selected_shape_idx, category],
                outputs=[shape_slider]
            )

            # Jump to start button
            jump_start_btn.click(
                fn=self.jump_to_start,
                inputs=None,
                outputs=[shape_slider]
            )

            # Jump to end button
            jump_end_btn.click(
                fn=self.jump_to_end,
                inputs=[category],
                outputs=[shape_slider]
            )

            # Random shape button
            random_btn.click(
                fn=self.random_shape,
                inputs=[category],
                outputs=[shape_slider]
            )

            # Connect the random prompt button
            random_prompt_btn.click(
                fn=self.random_prompt,
                inputs=[],
                outputs=[prompt]
            )

            # Update the UI when category changes
            category.change(
                fn=self.on_category_change,
                inputs=[category],
                outputs=[shape_slider, selected_shape_idx, current_shape_plot, shape_counter]
            )

            # Update status text when generating
            run_button.click(
                fn=lambda: self.create_status_message("Generating images...", "waiting"),
                inputs=None,
                outputs=[status_text]
            )

            # Generate images when button is clicked
            run_button.click(
                fn=self.generate_images,
                inputs=[prompt, category, selected_shape_idx, guidance_strength, seed],
                outputs=[gallery, status_text]
            )

        return demo

# Main function and entry point
def main():
    parser = argparse.ArgumentParser(description="ShapeWords Gradio Demo")
    parser.add_argument('--share', action='store_true', help='Create a public link')
    args = parser.parse_args()

    # Create the demo app and UI
    app = ShapeWordsDemo()
    demo = app.create_ui()
    demo.launch(share=args.share)


if __name__ == "__main__":
    main()