## 1. Install Required Packages

Install all necessary Python packages for the emotion classification project. This includes libraries for experiment tracking (wandb), model hosting (huggingface_hub), deep learning (transformers, torch), and data processing (scikit-learn, pandas, numpy).

In [None]:
!pip install -q wandb huggingface_hub transformers torch scikit-learn pandas numpy

## 2. API Key Setup

Configure authentication for Weights & Biases and HuggingFace. W&B will track our experiments and metrics during training, while HuggingFace allows us to upload and share the trained model. You'll need to provide your API keys when prompted.

In [None]:
import wandb
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()

wb_secret = user_secrets.get_secret("wandb_api_key") 
hf_secret = user_secrets.get_secret("hf_api_key")

wandb.login(key=wb_secret)
login()

## 3. Imports & Setup

Import all required libraries for the project. This includes data manipulation (pandas, numpy), deep learning frameworks (torch), model utilities (transformers), evaluation metrics (sklearn), and system utilities.

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, classification_report
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
    AutoConfig
)
from torch.optim import AdamW
from torch.cuda.amp import autocast, GradScaler
from huggingface_hub import HfApi, create_repo
import gc
import warnings
import os
from datetime import datetime

warnings.filterwarnings("ignore")

## 4. Configuration

Define all hyperparameters and settings for the training process. This includes model selection, training parameters (learning rate, batch size, epochs), file paths, and experiment tracking configuration. All settings are centralized in a Config class for easy modification.

In [None]:
class Config:
    # Project Info
    PROJECT_NAME = "emotion-classification-dl"
    EXPERIMENT_NAME = f"deberta-v3-large-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
    
    # Hugging Face Settings
    HF_USERNAME = None  # Will be set automatically after login
    HF_MODEL_NAME = "emotion-classifier-deberta-v3"
    
    # Training Parameters
    SEED = 42
    LABELS = ["anger", "fear", "joy", "sadness", "surprise"]
    MODEL_NAME = "microsoft/deberta-v3-large"
    MAX_LEN = 128
    BATCH_SIZE = 16
    EPOCHS = 4
    LR = 1.5e-5
    WEIGHT_DECAY = 0.01
    WARMUP_RATIO = 0.1
    N_FOLDS = 5
    
    # Paths
    TRAIN_CSV = "/kaggle/input/2025-sep-dl-gen-ai-project/train.csv"
    TEST_CSV = "/kaggle/input/2025-sep-dl-gen-ai-project/test.csv"
    
    # Weights & Biases
    WANDB_PROJECT = "emotion-classification-dl"
    LOG_EVERY_N_STEPS = 50

CONFIG = Config()

# Get HuggingFace username
try:
    from huggingface_hub import whoami
    CONFIG.HF_USERNAME = whoami()["name"]
    print(f"HuggingFace username: {CONFIG.HF_USERNAME}")
except:
    print("Could not fetch HuggingFace username. Please set CONFIG.HF_USERNAME manually.")

## 5. Seed & Device Setup

Set random seeds for reproducibility across all libraries (numpy, torch, CUDA). This ensures that the model training produces consistent results across different runs. Also configure the device (GPU/CPU) for training.

In [None]:
def set_seed(seed=CONFIG.SEED):
    """Set random seed for reproducibility."""
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 6. Utility Functions

Define helper functions used throughout the training process:
- ensure_text_column: Automatically detects and renames text columns in the dataset
- tune_thresholds: Optimizes classification thresholds for each emotion label to maximize F1 score
- get_optimizer_params: Configures differential weight decay for different parameter groups
- compute_metrics: Calculates comprehensive evaluation metrics including per-label and overall F1 scores

In [None]:
def ensure_text_column(df: pd.DataFrame) -> pd.DataFrame:
    """Ensure the DataFrame has a 'text' column."""
    if "text" in df.columns:
        return df
    for c in ["comment_text", "sentence", "content", "review"]:
        if c in df.columns:
            return df.rename(columns={c: "text"})
    raise ValueError("No text column found. Add/rename your text column to 'text'.")

def tune_thresholds(y_true: np.ndarray, y_prob: np.ndarray) -> np.ndarray:
    """Optimize classification thresholds for each label."""
    th = np.zeros(y_true.shape[1], dtype=np.float32)
    for j in range(y_true.shape[1]):
        best_t, best_f1 = 0.5, -1
        for t in np.linspace(0.1, 0.9, 17):
            f1 = f1_score(y_true[:, j], (y_prob[:, j] >= t).astype(int), zero_division=0)
            if f1 > best_f1:
                best_f1, best_t = f1, t
        th[j] = best_t
    return th

def get_optimizer_params(model, lr, weight_decay):
    """Get optimizer parameters with differential weight decay."""
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
            "weight_decay": weight_decay,
        },
        {
            "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]
    return optimizer_parameters

def compute_metrics(y_true, y_pred):
    """Compute detailed metrics for multi-label classification."""
    metrics = {}
    
    # Overall metrics
    metrics['macro_f1'] = f1_score(y_true, y_pred, average='macro', zero_division=0)
    metrics['micro_f1'] = f1_score(y_true, y_pred, average='micro', zero_division=0)
    metrics['weighted_f1'] = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    # Per-label metrics
    for i, label in enumerate(CONFIG.LABELS):
        metrics[f'{label}_f1'] = f1_score(y_true[:, i], y_pred[:, i], zero_division=0)
    
    return metrics

## 7. Dataset Class

Implement a PyTorch Dataset class for emotion classification. This class handles tokenization of text inputs using the transformer tokenizer, applies padding and truncation to a fixed length, and prepares the data in the format expected by the model.

In [None]:
class EmotionDS(torch.utils.data.Dataset):
    """Dataset class for emotion classification."""
    
    def __init__(self, df, tokenizer, max_len, is_test=False):
        self.texts = df["text"].tolist()
        self.is_test = is_test
        if not is_test:
            self.labels = df[CONFIG.LABELS].values.astype(np.float32)
        self.tok = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, i):
        enc = self.tok(
            self.texts[i],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt",
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        if not self.is_test:
            item["labels"] = torch.tensor(self.labels[i])
        return item

## 8. Training & Validation Functions

Implement the core training and validation loops:
- train_one_epoch: Performs one complete pass through the training data with gradient updates, mixed precision training, gradient clipping, and logging to W&B
- validate: Evaluates the model on validation data without gradient computation, returning loss and predictions for metric calculation

In [None]:
def train_one_epoch(model, loader, optimizer, scheduler, scaler, criterion, epoch, fold):
    """Train for one epoch with W&B logging."""
    model.train()
    losses = []
    
    for step, batch in enumerate(loader):
        batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}
        optimizer.zero_grad(set_to_none=True)
        
        with autocast(enabled=True):
            out = model(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"])
            loss = criterion(out.logits, batch["labels"])
        
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        
        losses.append(loss.item())
        
        # Log to W&B
        if step % CONFIG.LOG_EVERY_N_STEPS == 0:
            wandb.log({
                f"fold_{fold}/train_loss_step": loss.item(),
                f"fold_{fold}/learning_rate": scheduler.get_last_lr()[0],
                f"fold_{fold}/epoch": epoch,
                "step": step + epoch * len(loader)
            })
    
    return np.mean(losses)

def validate(model, loader, criterion):
    """Validate the model."""
    model.eval()
    losses = []
    preds = []
    targs = []
    
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}
            with autocast(enabled=True):
                out = model(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"])
                loss = criterion(out.logits, batch["labels"])
            losses.append(loss.item())
            preds.append(torch.sigmoid(out.logits).float().cpu().numpy())
            targs.append(batch["labels"].cpu().numpy())
    
    return np.mean(losses), np.vstack(preds), np.vstack(targs)

## 9. Main Training Loop with K-Fold Cross-Validation

Execute the complete training pipeline using stratified K-fold cross-validation. For each fold, the function:
1. Splits the data into training and validation sets
2. Initializes the model, optimizer, and learning rate scheduler
3. Trains for the specified number of epochs
4. Tracks the best model based on validation F1 score
5. Saves model checkpoints and logs metrics to W&B
6. Accumulates out-of-fold predictions for final evaluation

In [None]:
def run_training():
    """Main training function with K-Fold CV and W&B logging."""
    
    if not os.path.exists(CONFIG.TRAIN_CSV):
        print("Train CSV not found. Please check the path.")
        return None, None, None

    # Initialize W&B
    wandb.init(
        project=CONFIG.WANDB_PROJECT,
        name=CONFIG.EXPERIMENT_NAME,
        config={
            "model": CONFIG.MODEL_NAME,
            "max_len": CONFIG.MAX_LEN,
            "batch_size": CONFIG.BATCH_SIZE,
            "epochs": CONFIG.EPOCHS,
            "learning_rate": CONFIG.LR,
            "weight_decay": CONFIG.WEIGHT_DECAY,
            "warmup_ratio": CONFIG.WARMUP_RATIO,
            "n_folds": CONFIG.N_FOLDS,
            "seed": CONFIG.SEED,
        }
    )
    
    print("Loading data...")
    df = pd.read_csv(CONFIG.TRAIN_CSV)
    df = ensure_text_column(df)
    
    print(f"Data loaded: {len(df)} samples")
    print(f"Label distribution:")
    for label in CONFIG.LABELS:
        print(f"  {label}: {df[label].sum()} ({df[label].mean()*100:.1f}%)")
    
    # Create Stratified Folds
    skf = StratifiedKFold(n_splits=CONFIG.N_FOLDS, shuffle=True, random_state=CONFIG.SEED)
    y_str = df[CONFIG.LABELS].astype(str).agg("".join, axis=1)
    
    oof_preds = np.zeros((len(df), len(CONFIG.LABELS)))
    fold_scores = []
    
    tokenizer = AutoTokenizer.from_pretrained(CONFIG.MODEL_NAME)
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(df, y_str)):
        print(f"\n{'='*60}")
        print(f"FOLD {fold+1}/{CONFIG.N_FOLDS}")
        print(f"{'='*60}")
        
        df_tr = df.iloc[train_idx].reset_index(drop=True)
        df_va = df.iloc[val_idx].reset_index(drop=True)
        
        ds_tr = EmotionDS(df_tr, tokenizer, CONFIG.MAX_LEN)
        ds_va = EmotionDS(df_va, tokenizer, CONFIG.MAX_LEN)
        
        dl_tr = torch.utils.data.DataLoader(
            ds_tr, batch_size=CONFIG.BATCH_SIZE, shuffle=True, 
            num_workers=2, pin_memory=True
        )
        dl_va = torch.utils.data.DataLoader(
            ds_va, batch_size=CONFIG.BATCH_SIZE, shuffle=False, 
            num_workers=2, pin_memory=True
        )
        
        print(f"Initializing model...")
        model = AutoModelForSequenceClassification.from_pretrained(
            CONFIG.MODEL_NAME, 
            num_labels=len(CONFIG.LABELS),
            problem_type="multi_label_classification"
        )
        model.to(device)
        
        optimizer_params = get_optimizer_params(model, CONFIG.LR, CONFIG.WEIGHT_DECAY)
        optimizer = AdamW(optimizer_params, lr=CONFIG.LR)
        
        total_steps = len(dl_tr) * CONFIG.EPOCHS
        scheduler = get_linear_schedule_with_warmup(
            optimizer, 
            num_warmup_steps=int(total_steps * CONFIG.WARMUP_RATIO), 
            num_training_steps=total_steps
        )
        
        criterion = nn.BCEWithLogitsLoss()
        scaler = GradScaler(enabled=True)
        
        best_f1 = 0
        best_state = None
        best_epoch = 0
        
        for ep in range(CONFIG.EPOCHS):
            print(f"\nEpoch {ep+1}/{CONFIG.EPOCHS}")
            
            train_loss = train_one_epoch(
                model, dl_tr, optimizer, scheduler, scaler, criterion, ep, fold
            )
            val_loss, val_preds, val_targs = validate(model, dl_va, criterion)
            
            # Compute metrics
            val_pred_binary = (val_preds >= 0.5).astype(int)
            metrics = compute_metrics(val_targs, val_pred_binary)
            val_f1 = metrics['macro_f1']
            
            print(f"  Train Loss: {train_loss:.4f}")
            print(f"  Val Loss: {val_loss:.4f}")
            print(f"  Val Macro F1: {val_f1:.4f}")
            print(f"  Per-label F1:")
            for label in CONFIG.LABELS:
                print(f"    {label}: {metrics[f'{label}_f1']:.4f}")
            
            # Log to W&B
            wandb.log({
                f"fold_{fold}/epoch": ep,
                f"fold_{fold}/train_loss_epoch": train_loss,
                f"fold_{fold}/val_loss": val_loss,
                f"fold_{fold}/val_macro_f1": val_f1,
                **{f"fold_{fold}/val_{label}_f1": metrics[f'{label}_f1'] for label in CONFIG.LABELS}
            })
            
            if val_f1 > best_f1:
                best_f1 = val_f1
                best_state = model.state_dict()
                best_epoch = ep
                print(f"  New best F1: {best_f1:.4f}")
        
        print(f"\nBest validation F1 for fold {fold+1}: {best_f1:.4f} (epoch {best_epoch+1})")
        fold_scores.append(best_f1)
        
        # Save best model for this fold
        model_path = f"model_fold_{fold}.pth"
        torch.save(best_state, model_path)
        print(f"Model saved to {model_path}")
        
        # Get OOF predictions with best model
        model.load_state_dict(best_state)
        _, val_preds, _ = validate(model, dl_va, criterion)
        oof_preds[val_idx] = val_preds
        
        # Log fold summary to W&B
        wandb.log({
            f"fold_{fold}/best_val_f1": best_f1,
            f"fold_{fold}/best_epoch": best_epoch
        })
        
        del model, optimizer, scaler, scheduler
        torch.cuda.empty_cache()
        gc.collect()
    
    # Overall CV results
    print(f"\n{'='*60}")
    print("CROSS-VALIDATION RESULTS")
    print(f"{'='*60}")
    for i, score in enumerate(fold_scores):
        print(f"Fold {i+1}: {score:.4f}")
    print(f"\nMean CV F1: {np.mean(fold_scores):.4f} +/- {np.std(fold_scores):.4f}")
    
    # Log overall CV results
    wandb.log({
        "cv_mean_f1": np.mean(fold_scores),
        "cv_std_f1": np.std(fold_scores),
        "cv_fold_scores": fold_scores
    })
    
    return oof_preds, df[CONFIG.LABELS].values, fold_scores

# Run training
if os.path.exists(CONFIG.TRAIN_CSV):
    oof_preds, y_true, fold_scores = run_training()
else:
    print("Skipping training as data is not found.")
    oof_preds, y_true, fold_scores = None, None, None

## 10. Threshold Optimization

Optimize the classification thresholds for each emotion label independently. Instead of using a fixed 0.5 threshold, this searches for the optimal threshold per label that maximizes the F1 score on out-of-fold predictions. This often improves performance on imbalanced datasets.

In [None]:
if oof_preds is not None and y_true is not None:
    print("\nOptimizing classification thresholds...")
    best_thresholds = tune_thresholds(y_true, oof_preds)
    
    # Apply optimized thresholds
    oof_tuned = (oof_preds >= best_thresholds).astype(int)
    
    # Compute final metrics
    final_metrics = compute_metrics(y_true, oof_tuned)
    final_f1 = final_metrics['macro_f1']
    
    print(f"\n{'='*60}")
    print("FINAL RESULTS WITH OPTIMIZED THRESHOLDS")
    print(f"{'='*60}")
    print(f"\nOverall Metrics:")
    print(f"  Macro F1: {final_f1:.4f}")
    print(f"  Micro F1: {final_metrics['micro_f1']:.4f}")
    print(f"  Weighted F1: {final_metrics['weighted_f1']:.4f}")
    
    print(f"\nPer-Label Results:")
    print(f"{'Label':<12} {'Threshold':<12} {'F1 Score':<12}")
    print("-" * 40)
    for i, label in enumerate(CONFIG.LABELS):
        print(f"{label:<12} {best_thresholds[i]:<12.3f} {final_metrics[f'{label}_f1']:<12.4f}")
    
    # Log to W&B
    wandb.log({
        "final_macro_f1": final_f1,
        "final_micro_f1": final_metrics['micro_f1'],
        "final_weighted_f1": final_metrics['weighted_f1'],
        **{f"threshold_{label}": best_thresholds[i] for i, label in enumerate(CONFIG.LABELS)},
        **{f"final_{label}_f1": final_metrics[f'{label}_f1'] for label in CONFIG.LABELS}
    })
    
    # Save thresholds
    np.save('best_thresholds.npy', best_thresholds)
    print(f"\nThresholds saved to best_thresholds.npy")
else:
    best_thresholds = np.array([0.5] * len(CONFIG.LABELS))
    print("Using default thresholds of 0.5")

## 11. Upload to Hugging Face Hub

Package and upload the trained model to Hugging Face Hub for easy sharing and deployment. This includes:
- Selecting the best performing fold model
- Creating a comprehensive model card with performance metrics and usage examples
- Uploading the model, tokenizer, optimized thresholds, and documentation to the repository

In [None]:
def upload_to_huggingface():
    """Upload the best model to Hugging Face Hub with model card."""
    
    if CONFIG.HF_USERNAME is None:
        print("HuggingFace username not set. Please set CONFIG.HF_USERNAME")
        return
    
    print("\nUploading model to Hugging Face Hub...")
    
    # Determine best fold based on validation scores
    if fold_scores:
        best_fold = np.argmax(fold_scores)
        print(f"  Using model from fold {best_fold+1} (F1: {fold_scores[best_fold]:.4f})")
    else:
        best_fold = 0
        print(f"  Using model from fold 1")
    
    # Load the best model
    model = AutoModelForSequenceClassification.from_pretrained(
        CONFIG.MODEL_NAME,
        num_labels=len(CONFIG.LABELS),
        problem_type="multi_label_classification"
    )
    model.load_state_dict(torch.load(f"model_fold_{best_fold}.pth"))
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(CONFIG.MODEL_NAME)
    
    # Create repository
    repo_id = f"{CONFIG.HF_USERNAME}/{CONFIG.HF_MODEL_NAME}"
    print(f"  Repository: {repo_id}")
    
    try:
        create_repo(repo_id, exist_ok=True, private=False)
        print("  Repository created/verified")
    except Exception as e:
        print(f"  Repository might already exist: {e}")
    
    # Precompute all dynamic values to avoid backslashes in f-strings
    final_f1_str = f"{final_f1:.4f}" if 'final_f1' in locals() else 'N/A'
    
    if fold_scores:
        cv_mean_str = f"{np.mean(fold_scores):.4f}"
        cv_std_str = f"{np.std(fold_scores):.4f}"
    else:
        cv_mean_str = 'N/A'
        cv_std_str = 'N/A'
    
    if 'final_metrics' in locals():
        per_label_lines = []
        for label in CONFIG.LABELS:
            per_label_lines.append(f"- **{label.capitalize()}:** {final_metrics[f'{label}_f1']:.4f}")
        per_label_perf = '\n'.join(per_label_lines)
    else:
        per_label_perf = 'N/A'
    
    if 'best_thresholds' in locals():
        threshold_lines = []
        for i, label in enumerate(CONFIG.LABELS):
            threshold_lines.append(f"- **{label.capitalize()}:** {best_thresholds[i]:.3f}")
        thresholds_block = '\n'.join(threshold_lines)
        thresholds_list = list(best_thresholds)
    else:
        thresholds_block = 'N/A'
        thresholds_list = [0.5] * 5
    
    device_str = 'GPU' if torch.cuda.is_available() else 'CPU'
    torch_version = torch.__version__
    transformers_version = __import__('transformers').__version__
    labels_list = list(CONFIG.LABELS)
    labels_str = ', '.join(CONFIG.LABELS)
    
    # Create model card
    model_card = f"""---
language: en
license: apache-2.0
tags:
- text-classification
- multi-label-classification
- emotion-classification
- deberta-v3
- pytorch
---

# Emotion Classification Model

This model classifies text into 5 emotion categories: **anger**, **fear**, **joy**, **sadness**, and **surprise**.

## Model Description

- **Base Model:** {CONFIG.MODEL_NAME}
- **Task:** Multi-label text classification
- **Labels:** {labels_str}
- **Training Strategy:** {CONFIG.N_FOLDS}-Fold Cross-Validation
- **Framework:** PyTorch + Transformers

## Performance

### Overall Metrics
- **Macro F1:** {final_f1_str}
- **Cross-Validation:** {cv_mean_str} +/- {cv_std_str}

### Per-Label Performance
{per_label_perf}

### Optimized Thresholds
{thresholds_block}

## Usage

```python
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

# Load model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("{repo_id}")
tokenizer = AutoTokenizer.from_pretrained("{repo_id}")

# Optimized thresholds (use these for best results)
thresholds = np.array({thresholds_list})
labels = {labels_list}

# Predict emotions
def predict_emotions(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.sigmoid(outputs.logits).cpu().numpy()[0]
    predictions = (probs >= thresholds).astype(int)
    return {{label: (pred, prob) for label, pred, prob in zip(labels, predictions, probs)}}

# Example
text = "I am so excited about this amazing opportunity!"
result = predict_emotions(text)
print(result)
```

## Training Details

- **Optimizer:** AdamW with differential weight decay
- **Learning Rate:** {CONFIG.LR}
- **Batch Size:** {CONFIG.BATCH_SIZE}
- **Epochs:** {CONFIG.EPOCHS}
- **Max Sequence Length:** {CONFIG.MAX_LEN}
- **Warmup Ratio:** {CONFIG.WARMUP_RATIO}
- **Weight Decay:** {CONFIG.WEIGHT_DECAY}
- **Mixed Precision:** Enabled (FP16)
- **Gradient Clipping:** 1.0

## Training Infrastructure

- **Device:** {device_str}
- **Training Time:** ~{CONFIG.EPOCHS * CONFIG.N_FOLDS * 15} minutes (approximate)
- **Framework Versions:**
  - PyTorch: {torch_version}
  - Transformers: {transformers_version}

## Model Card Authors

{CONFIG.HF_USERNAME}

## Model Card Contact

For questions or feedback, please open an issue in the model repository.
"""
    
    # Save model card
    with open("README.md", "w") as f:
        f.write(model_card)
    print("  Model card created")
    
    # Push to hub
    try:
        # Upload model card first to avoid metadata validation issues
        api = HfApi()
        print("  Uploading model card...")
        api.upload_file(
            path_or_fileobj="README.md",
            path_in_repo="README.md",
            repo_id=repo_id,
            commit_message="Add model card"
        )
        print("  Model card uploaded")
        
        print("  Uploading model...")
        model.push_to_hub(repo_id, commit_message="Upload emotion classification model")
        print("  Model uploaded")
        
        print("  Uploading tokenizer...")
        tokenizer.push_to_hub(repo_id, commit_message="Upload tokenizer")
        print("  Tokenizer uploaded")
        
        print("  Uploading thresholds...")
        api.upload_file(
            path_or_fileobj="best_thresholds.npy",
            path_in_repo="best_thresholds.npy",
            repo_id=repo_id,
            commit_message="Add optimized thresholds"
        )
        print("  Thresholds uploaded")
        
        print(f"\nModel successfully uploaded to: https://huggingface.co/{repo_id}")
        
        # Log to W&B
        wandb.log({"huggingface_repo": repo_id})
        wandb.config.update({"huggingface_repo": repo_id})
        
        return repo_id
        
    except Exception as e:
        print(f"  Error uploading to Hugging Face: {e}")
        return None

# Upload model
if oof_preds is not None:
    hf_repo_id = upload_to_huggingface()
else:
    print("Skipping HuggingFace upload as training was not completed.")
    hf_repo_id = None

## 12. Save Training Summary

Create and save a comprehensive JSON summary of the training experiment. This includes all cross-validation scores, final metrics, optimized thresholds, per-label performance, and links to the HuggingFace repository and W&B run for future reference.

In [None]:
if oof_preds is not None:
    # Create training summary
    summary = {
        "experiment_name": CONFIG.EXPERIMENT_NAME,
        "model": CONFIG.MODEL_NAME,
        "n_folds": CONFIG.N_FOLDS,
        "cv_scores": fold_scores,
        "cv_mean": np.mean(fold_scores),
        "cv_std": np.std(fold_scores),
        "final_macro_f1": final_f1,
        "thresholds": best_thresholds.tolist(),
        "per_label_f1": {label: final_metrics[f'{label}_f1'] for label in CONFIG.LABELS},
        "huggingface_repo": hf_repo_id,
        "wandb_run": wandb.run.url if wandb.run else None,
    }
    
    import json
    with open('training_summary.json', 'w') as f:
        json.dump(summary, f, indent=2)
    
    print("\n Training summary saved to training_summary.json")
    print("\n Summary:")
    print(json.dumps(summary, indent=2))

## 13. Finish W&B Run

Finalize the Weights & Biases experiment tracking run and display summary information including links to the uploaded model and experiment dashboard.

In [None]:
# Finish W&B run
if wandb.run:
    print(f"\nView your experiment at: {wandb.run.url}")
    wandb.finish()
    print("W&B run finished")

print("\n" + "="*60)
print("TRAINING COMPLETE")
print("="*60)
if hf_repo_id:
    print(f"\nModel: https://huggingface.co/{hf_repo_id}")
if wandb.run:
    print(f"W&B: {wandb.run.url}")
print("\nNext: Use the submission notebook to create Kaggle predictions")