{ "cells": [ { "cell_type": "markdown", "id": "8746fae6", "metadata": { "papermill": { "duration": 0.004991, "end_time": "2025-12-02T21:50:17.265155", "exception": false, "start_time": "2025-12-02T21:50:17.260164", "status": "completed" }, "tags": [] }, "source": [ "## 1. Install Required Packages\n", "\n", "Install all necessary Python packages for the emotion classification project. This includes libraries for experiment tracking (wandb), model hosting (huggingface_hub), deep learning (transformers, torch), and data processing (scikit-learn, pandas, numpy)." ] }, { "cell_type": "code", "execution_count": null, "id": "15aa58bc", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:50:17.274278Z", "iopub.status.busy": "2025-12-02T21:50:17.274012Z", "iopub.status.idle": "2025-12-02T21:51:28.685152Z", "shell.execute_reply": "2025-12-02T21:51:28.684384Z" }, "papermill": { "duration": 71.417599, "end_time": "2025-12-02T21:51:28.686724", "exception": false, "start_time": "2025-12-02T21:50:17.269125", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "!pip install -q wandb huggingface_hub transformers torch scikit-learn pandas numpy" ] }, { "cell_type": "markdown", "id": "fe8bc916", "metadata": { "papermill": { "duration": 0.020591, "end_time": "2025-12-02T21:51:28.728201", "exception": false, "start_time": "2025-12-02T21:51:28.707610", "status": "completed" }, "tags": [] }, "source": [ "## 2. API Key Setup\n", "\n", "Configure authentication for Weights & Biases and HuggingFace. W&B will track our experiments and metrics during training, while HuggingFace allows us to upload and share the trained model. You'll need to provide your API keys when prompted." ] }, { "cell_type": "code", "execution_count": null, "id": "228b7b20", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:28.768325Z", "iopub.status.busy": "2025-12-02T21:51:28.768036Z", "iopub.status.idle": "2025-12-02T21:51:31.795433Z", "shell.execute_reply": "2025-12-02T21:51:31.794721Z" }, "papermill": { "duration": 3.048968, "end_time": "2025-12-02T21:51:31.796586", "exception": false, "start_time": "2025-12-02T21:51:28.747618", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import wandb\n", "from huggingface_hub import login\n", "from kaggle_secrets import UserSecretsClient\n", "\n", "user_secrets = UserSecretsClient()\n", "\n", "wb_secret = user_secrets.get_secret(\"wandb_api_key\") \n", "hf_secret = user_secrets.get_secret(\"hf_api_key\")\n", "\n", "wandb.login(key=wb_secret)\n", "login()" ] }, { "cell_type": "markdown", "id": "e2142967", "metadata": { "papermill": { "duration": 0.019644, "end_time": "2025-12-02T21:51:31.835986", "exception": false, "start_time": "2025-12-02T21:51:31.816342", "status": "completed" }, "tags": [] }, "source": [ "## 3. Imports & Setup\n", "\n", "Import all required libraries for the project. This includes data manipulation (pandas, numpy), deep learning frameworks (torch), model utilities (transformers), evaluation metrics (sklearn), and system utilities." ] }, { "cell_type": "code", "execution_count": null, "id": "651e7ca2", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:31.877936Z", "iopub.status.busy": "2025-12-02T21:51:31.877707Z", "iopub.status.idle": "2025-12-02T21:51:45.967740Z", "shell.execute_reply": "2025-12-02T21:51:45.966752Z" }, "papermill": { "duration": 14.113714, "end_time": "2025-12-02T21:51:45.969324", "exception": false, "start_time": "2025-12-02T21:51:31.855610", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import torch\n", "import torch.nn as nn\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.metrics import f1_score, classification_report\n", "from transformers import (\n", " AutoTokenizer,\n", " AutoModelForSequenceClassification,\n", " get_linear_schedule_with_warmup,\n", " AutoConfig\n", ")\n", "from torch.optim import AdamW\n", "from torch.cuda.amp import autocast, GradScaler\n", "from huggingface_hub import HfApi, create_repo\n", "import gc\n", "import warnings\n", "import os\n", "from datetime import datetime\n", "\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "id": "f340e191", "metadata": { "papermill": { "duration": 0.019008, "end_time": "2025-12-02T21:51:46.008938", "exception": false, "start_time": "2025-12-02T21:51:45.989930", "status": "completed" }, "tags": [] }, "source": [ "## 4. Configuration\n", "\n", "Define all hyperparameters and settings for the training process. This includes model selection, training parameters (learning rate, batch size, epochs), file paths, and experiment tracking configuration. All settings are centralized in a Config class for easy modification." ] }, { "cell_type": "code", "execution_count": null, "id": "fb935a4c", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:46.048845Z", "iopub.status.busy": "2025-12-02T21:51:46.048395Z", "iopub.status.idle": "2025-12-02T21:51:46.054765Z", "shell.execute_reply": "2025-12-02T21:51:46.053963Z" }, "papermill": { "duration": 0.027558, "end_time": "2025-12-02T21:51:46.055897", "exception": false, "start_time": "2025-12-02T21:51:46.028339", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class Config:\n", " # Project Info\n", " PROJECT_NAME = \"emotion-classification-dl\"\n", " EXPERIMENT_NAME = f\"deberta-v3-large-{datetime.now().strftime('%Y%m%d-%H%M%S')}\"\n", " \n", " # Hugging Face Settings\n", " HF_USERNAME = None # Will be set automatically after login\n", " HF_MODEL_NAME = \"emotion-classifier-deberta-v3\"\n", " \n", " # Training Parameters\n", " SEED = 42\n", " LABELS = [\"anger\", \"fear\", \"joy\", \"sadness\", \"surprise\"]\n", " MODEL_NAME = \"microsoft/deberta-v3-large\"\n", " MAX_LEN = 128\n", " BATCH_SIZE = 16\n", " EPOCHS = 4\n", " LR = 1.5e-5\n", " WEIGHT_DECAY = 0.01\n", " WARMUP_RATIO = 0.1\n", " N_FOLDS = 5\n", " \n", " # Paths\n", " TRAIN_CSV = \"/kaggle/input/2025-sep-dl-gen-ai-project/train.csv\"\n", " TEST_CSV = \"/kaggle/input/2025-sep-dl-gen-ai-project/test.csv\"\n", " \n", " # Weights & Biases\n", " WANDB_PROJECT = \"emotion-classification-dl\"\n", " LOG_EVERY_N_STEPS = 50\n", "\n", "CONFIG = Config()\n", "\n", "# Get HuggingFace username\n", "try:\n", " from huggingface_hub import whoami\n", " CONFIG.HF_USERNAME = whoami()[\"name\"]\n", " print(f\"HuggingFace username: {CONFIG.HF_USERNAME}\")\n", "except:\n", " print(\"Could not fetch HuggingFace username. Please set CONFIG.HF_USERNAME manually.\")" ] }, { "cell_type": "markdown", "id": "048a66c7", "metadata": { "papermill": { "duration": 0.019658, "end_time": "2025-12-02T21:51:46.095028", "exception": false, "start_time": "2025-12-02T21:51:46.075370", "status": "completed" }, "tags": [] }, "source": [ "## 5. Seed & Device Setup\n", "\n", "Set random seeds for reproducibility across all libraries (numpy, torch, CUDA). This ensures that the model training produces consistent results across different runs. Also configure the device (GPU/CPU) for training." ] }, { "cell_type": "code", "execution_count": null, "id": "0f88d179", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:46.134867Z", "iopub.status.busy": "2025-12-02T21:51:46.134264Z", "iopub.status.idle": "2025-12-02T21:51:46.252392Z", "shell.execute_reply": "2025-12-02T21:51:46.251378Z" }, "papermill": { "duration": 0.139274, "end_time": "2025-12-02T21:51:46.253695", "exception": false, "start_time": "2025-12-02T21:51:46.114421", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def set_seed(seed=CONFIG.SEED):\n", " \"\"\"Set random seed for reproducibility.\"\"\"\n", " np.random.seed(seed)\n", " torch.manual_seed(seed)\n", " torch.cuda.manual_seed_all(seed)\n", " os.environ['PYTHONHASHSEED'] = str(seed)\n", " torch.backends.cudnn.deterministic = True\n", " torch.backends.cudnn.benchmark = False\n", "\n", "set_seed()\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "print(f\"Using device: {device}\")\n", "if torch.cuda.is_available():\n", " print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n", " print(f\"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB\")" ] }, { "cell_type": "markdown", "id": "8234caf3", "metadata": { "papermill": { "duration": 0.020092, "end_time": "2025-12-02T21:51:46.294087", "exception": false, "start_time": "2025-12-02T21:51:46.273995", "status": "completed" }, "tags": [] }, "source": [ "## 6. Utility Functions\n", "\n", "Define helper functions used throughout the training process:\n", "- ensure_text_column: Automatically detects and renames text columns in the dataset\n", "- tune_thresholds: Optimizes classification thresholds for each emotion label to maximize F1 score\n", "- get_optimizer_params: Configures differential weight decay for different parameter groups\n", "- compute_metrics: Calculates comprehensive evaluation metrics including per-label and overall F1 scores" ] }, { "cell_type": "code", "execution_count": null, "id": "8a9ddcfb", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:46.336025Z", "iopub.status.busy": "2025-12-02T21:51:46.335802Z", "iopub.status.idle": "2025-12-02T21:51:46.344414Z", "shell.execute_reply": "2025-12-02T21:51:46.343855Z" }, "papermill": { "duration": 0.030393, "end_time": "2025-12-02T21:51:46.345484", "exception": false, "start_time": "2025-12-02T21:51:46.315091", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def ensure_text_column(df: pd.DataFrame) -> pd.DataFrame:\n", " \"\"\"Ensure the DataFrame has a 'text' column.\"\"\"\n", " if \"text\" in df.columns:\n", " return df\n", " for c in [\"comment_text\", \"sentence\", \"content\", \"review\"]:\n", " if c in df.columns:\n", " return df.rename(columns={c: \"text\"})\n", " raise ValueError(\"No text column found. Add/rename your text column to 'text'.\")\n", "\n", "def tune_thresholds(y_true: np.ndarray, y_prob: np.ndarray) -> np.ndarray:\n", " \"\"\"Optimize classification thresholds for each label.\"\"\"\n", " th = np.zeros(y_true.shape[1], dtype=np.float32)\n", " for j in range(y_true.shape[1]):\n", " best_t, best_f1 = 0.5, -1\n", " for t in np.linspace(0.1, 0.9, 17):\n", " f1 = f1_score(y_true[:, j], (y_prob[:, j] >= t).astype(int), zero_division=0)\n", " if f1 > best_f1:\n", " best_f1, best_t = f1, t\n", " th[j] = best_t\n", " return th\n", "\n", "def get_optimizer_params(model, lr, weight_decay):\n", " \"\"\"Get optimizer parameters with differential weight decay.\"\"\"\n", " param_optimizer = list(model.named_parameters())\n", " no_decay = [\"bias\", \"LayerNorm.bias\", \"LayerNorm.weight\"]\n", " optimizer_parameters = [\n", " {\n", " \"params\": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],\n", " \"weight_decay\": weight_decay,\n", " },\n", " {\n", " \"params\": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],\n", " \"weight_decay\": 0.0,\n", " },\n", " ]\n", " return optimizer_parameters\n", "\n", "def compute_metrics(y_true, y_pred):\n", " \"\"\"Compute detailed metrics for multi-label classification.\"\"\"\n", " metrics = {}\n", " \n", " # Overall metrics\n", " metrics['macro_f1'] = f1_score(y_true, y_pred, average='macro', zero_division=0)\n", " metrics['micro_f1'] = f1_score(y_true, y_pred, average='micro', zero_division=0)\n", " metrics['weighted_f1'] = f1_score(y_true, y_pred, average='weighted', zero_division=0)\n", " \n", " # Per-label metrics\n", " for i, label in enumerate(CONFIG.LABELS):\n", " metrics[f'{label}_f1'] = f1_score(y_true[:, i], y_pred[:, i], zero_division=0)\n", " \n", " return metrics" ] }, { "cell_type": "markdown", "id": "24cbd4c6", "metadata": { "papermill": { "duration": 0.019023, "end_time": "2025-12-02T21:51:46.383800", "exception": false, "start_time": "2025-12-02T21:51:46.364777", "status": "completed" }, "tags": [] }, "source": [ "## 7. Dataset Class\n", "\n", "Implement a PyTorch Dataset class for emotion classification. This class handles tokenization of text inputs using the transformer tokenizer, applies padding and truncation to a fixed length, and prepares the data in the format expected by the model." ] }, { "cell_type": "code", "execution_count": null, "id": "9052c10c", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:46.423323Z", "iopub.status.busy": "2025-12-02T21:51:46.423092Z", "iopub.status.idle": "2025-12-02T21:51:46.428663Z", "shell.execute_reply": "2025-12-02T21:51:46.428144Z" }, "papermill": { "duration": 0.026678, "end_time": "2025-12-02T21:51:46.429799", "exception": false, "start_time": "2025-12-02T21:51:46.403121", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class EmotionDS(torch.utils.data.Dataset):\n", " \"\"\"Dataset class for emotion classification.\"\"\"\n", " \n", " def __init__(self, df, tokenizer, max_len, is_test=False):\n", " self.texts = df[\"text\"].tolist()\n", " self.is_test = is_test\n", " if not is_test:\n", " self.labels = df[CONFIG.LABELS].values.astype(np.float32)\n", " self.tok = tokenizer\n", " self.max_len = max_len\n", "\n", " def __len__(self):\n", " return len(self.texts)\n", "\n", " def __getitem__(self, i):\n", " enc = self.tok(\n", " self.texts[i],\n", " truncation=True,\n", " padding=\"max_length\",\n", " max_length=self.max_len,\n", " return_tensors=\"pt\",\n", " )\n", " item = {k: v.squeeze(0) for k, v in enc.items()}\n", " if not self.is_test:\n", " item[\"labels\"] = torch.tensor(self.labels[i])\n", " return item" ] }, { "cell_type": "markdown", "id": "7fc8f583", "metadata": { "papermill": { "duration": 0.019315, "end_time": "2025-12-02T21:51:46.468396", "exception": false, "start_time": "2025-12-02T21:51:46.449081", "status": "completed" }, "tags": [] }, "source": [ "## 8. Training & Validation Functions\n", "\n", "Implement the core training and validation loops:\n", "- train_one_epoch: Performs one complete pass through the training data with gradient updates, mixed precision training, gradient clipping, and logging to W&B\n", "- validate: Evaluates the model on validation data without gradient computation, returning loss and predictions for metric calculation" ] }, { "cell_type": "code", "execution_count": null, "id": "d524fa7f", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:46.508563Z", "iopub.status.busy": "2025-12-02T21:51:46.508342Z", "iopub.status.idle": "2025-12-02T21:51:46.516384Z", "shell.execute_reply": "2025-12-02T21:51:46.515644Z" }, "papermill": { "duration": 0.029545, "end_time": "2025-12-02T21:51:46.517414", "exception": false, "start_time": "2025-12-02T21:51:46.487869", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def train_one_epoch(model, loader, optimizer, scheduler, scaler, criterion, epoch, fold):\n", " \"\"\"Train for one epoch with W&B logging.\"\"\"\n", " model.train()\n", " losses = []\n", " \n", " for step, batch in enumerate(loader):\n", " batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}\n", " optimizer.zero_grad(set_to_none=True)\n", " \n", " with autocast(enabled=True):\n", " out = model(input_ids=batch[\"input_ids\"], attention_mask=batch[\"attention_mask\"])\n", " loss = criterion(out.logits, batch[\"labels\"])\n", " \n", " scaler.scale(loss).backward()\n", " scaler.unscale_(optimizer)\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n", " scaler.step(optimizer)\n", " scaler.update()\n", " scheduler.step()\n", " \n", " losses.append(loss.item())\n", " \n", " # Log to W&B\n", " if step % CONFIG.LOG_EVERY_N_STEPS == 0:\n", " wandb.log({\n", " f\"fold_{fold}/train_loss_step\": loss.item(),\n", " f\"fold_{fold}/learning_rate\": scheduler.get_last_lr()[0],\n", " f\"fold_{fold}/epoch\": epoch,\n", " \"step\": step + epoch * len(loader)\n", " })\n", " \n", " return np.mean(losses)\n", "\n", "def validate(model, loader, criterion):\n", " \"\"\"Validate the model.\"\"\"\n", " model.eval()\n", " losses = []\n", " preds = []\n", " targs = []\n", " \n", " with torch.no_grad():\n", " for batch in loader:\n", " batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}\n", " with autocast(enabled=True):\n", " out = model(input_ids=batch[\"input_ids\"], attention_mask=batch[\"attention_mask\"])\n", " loss = criterion(out.logits, batch[\"labels\"])\n", " losses.append(loss.item())\n", " preds.append(torch.sigmoid(out.logits).float().cpu().numpy())\n", " targs.append(batch[\"labels\"].cpu().numpy())\n", " \n", " return np.mean(losses), np.vstack(preds), np.vstack(targs)" ] }, { "cell_type": "markdown", "id": "79876a81", "metadata": { "papermill": { "duration": 0.01941, "end_time": "2025-12-02T21:51:46.556534", "exception": false, "start_time": "2025-12-02T21:51:46.537124", "status": "completed" }, "tags": [] }, "source": [ "## 9. Main Training Loop with K-Fold Cross-Validation\n", "\n", "Execute the complete training pipeline using stratified K-fold cross-validation. For each fold, the function:\n", "1. Splits the data into training and validation sets\n", "2. Initializes the model, optimizer, and learning rate scheduler\n", "3. Trains for the specified number of epochs\n", "4. Tracks the best model based on validation F1 score\n", "5. Saves model checkpoints and logs metrics to W&B\n", "6. Accumulates out-of-fold predictions for final evaluation" ] }, { "cell_type": "code", "execution_count": null, "id": "32a69a3d", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T21:51:46.596265Z", "iopub.status.busy": "2025-12-02T21:51:46.596031Z", "iopub.status.idle": "2025-12-02T23:08:14.788585Z", "shell.execute_reply": "2025-12-02T23:08:14.787581Z" }, "papermill": { "duration": 4588.214315, "end_time": "2025-12-02T23:08:14.790036", "exception": false, "start_time": "2025-12-02T21:51:46.575721", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def run_training():\n", " \"\"\"Main training function with K-Fold CV and W&B logging.\"\"\"\n", " \n", " if not os.path.exists(CONFIG.TRAIN_CSV):\n", " print(\"Train CSV not found. Please check the path.\")\n", " return None, None, None\n", "\n", " # Initialize W&B\n", " wandb.init(\n", " project=CONFIG.WANDB_PROJECT,\n", " name=CONFIG.EXPERIMENT_NAME,\n", " config={\n", " \"model\": CONFIG.MODEL_NAME,\n", " \"max_len\": CONFIG.MAX_LEN,\n", " \"batch_size\": CONFIG.BATCH_SIZE,\n", " \"epochs\": CONFIG.EPOCHS,\n", " \"learning_rate\": CONFIG.LR,\n", " \"weight_decay\": CONFIG.WEIGHT_DECAY,\n", " \"warmup_ratio\": CONFIG.WARMUP_RATIO,\n", " \"n_folds\": CONFIG.N_FOLDS,\n", " \"seed\": CONFIG.SEED,\n", " }\n", " )\n", " \n", " print(\"Loading data...\")\n", " df = pd.read_csv(CONFIG.TRAIN_CSV)\n", " df = ensure_text_column(df)\n", " \n", " print(f\"Data loaded: {len(df)} samples\")\n", " print(f\"Label distribution:\")\n", " for label in CONFIG.LABELS:\n", " print(f\" {label}: {df[label].sum()} ({df[label].mean()*100:.1f}%)\")\n", " \n", " # Create Stratified Folds\n", " skf = StratifiedKFold(n_splits=CONFIG.N_FOLDS, shuffle=True, random_state=CONFIG.SEED)\n", " y_str = df[CONFIG.LABELS].astype(str).agg(\"\".join, axis=1)\n", " \n", " oof_preds = np.zeros((len(df), len(CONFIG.LABELS)))\n", " fold_scores = []\n", " \n", " tokenizer = AutoTokenizer.from_pretrained(CONFIG.MODEL_NAME)\n", " \n", " for fold, (train_idx, val_idx) in enumerate(skf.split(df, y_str)):\n", " print(f\"\\n{'='*60}\")\n", " print(f\"FOLD {fold+1}/{CONFIG.N_FOLDS}\")\n", " print(f\"{'='*60}\")\n", " \n", " df_tr = df.iloc[train_idx].reset_index(drop=True)\n", " df_va = df.iloc[val_idx].reset_index(drop=True)\n", " \n", " ds_tr = EmotionDS(df_tr, tokenizer, CONFIG.MAX_LEN)\n", " ds_va = EmotionDS(df_va, tokenizer, CONFIG.MAX_LEN)\n", " \n", " dl_tr = torch.utils.data.DataLoader(\n", " ds_tr, batch_size=CONFIG.BATCH_SIZE, shuffle=True, \n", " num_workers=2, pin_memory=True\n", " )\n", " dl_va = torch.utils.data.DataLoader(\n", " ds_va, batch_size=CONFIG.BATCH_SIZE, shuffle=False, \n", " num_workers=2, pin_memory=True\n", " )\n", " \n", " print(f\"Initializing model...\")\n", " model = AutoModelForSequenceClassification.from_pretrained(\n", " CONFIG.MODEL_NAME, \n", " num_labels=len(CONFIG.LABELS),\n", " problem_type=\"multi_label_classification\"\n", " )\n", " model.to(device)\n", " \n", " optimizer_params = get_optimizer_params(model, CONFIG.LR, CONFIG.WEIGHT_DECAY)\n", " optimizer = AdamW(optimizer_params, lr=CONFIG.LR)\n", " \n", " total_steps = len(dl_tr) * CONFIG.EPOCHS\n", " scheduler = get_linear_schedule_with_warmup(\n", " optimizer, \n", " num_warmup_steps=int(total_steps * CONFIG.WARMUP_RATIO), \n", " num_training_steps=total_steps\n", " )\n", " \n", " criterion = nn.BCEWithLogitsLoss()\n", " scaler = GradScaler(enabled=True)\n", " \n", " best_f1 = 0\n", " best_state = None\n", " best_epoch = 0\n", " \n", " for ep in range(CONFIG.EPOCHS):\n", " print(f\"\\nEpoch {ep+1}/{CONFIG.EPOCHS}\")\n", " \n", " train_loss = train_one_epoch(\n", " model, dl_tr, optimizer, scheduler, scaler, criterion, ep, fold\n", " )\n", " val_loss, val_preds, val_targs = validate(model, dl_va, criterion)\n", " \n", " # Compute metrics\n", " val_pred_binary = (val_preds >= 0.5).astype(int)\n", " metrics = compute_metrics(val_targs, val_pred_binary)\n", " val_f1 = metrics['macro_f1']\n", " \n", " print(f\" Train Loss: {train_loss:.4f}\")\n", " print(f\" Val Loss: {val_loss:.4f}\")\n", " print(f\" Val Macro F1: {val_f1:.4f}\")\n", " print(f\" Per-label F1:\")\n", " for label in CONFIG.LABELS:\n", " print(f\" {label}: {metrics[f'{label}_f1']:.4f}\")\n", " \n", " # Log to W&B\n", " wandb.log({\n", " f\"fold_{fold}/epoch\": ep,\n", " f\"fold_{fold}/train_loss_epoch\": train_loss,\n", " f\"fold_{fold}/val_loss\": val_loss,\n", " f\"fold_{fold}/val_macro_f1\": val_f1,\n", " **{f\"fold_{fold}/val_{label}_f1\": metrics[f'{label}_f1'] for label in CONFIG.LABELS}\n", " })\n", " \n", " if val_f1 > best_f1:\n", " best_f1 = val_f1\n", " best_state = model.state_dict()\n", " best_epoch = ep\n", " print(f\" New best F1: {best_f1:.4f}\")\n", " \n", " print(f\"\\nBest validation F1 for fold {fold+1}: {best_f1:.4f} (epoch {best_epoch+1})\")\n", " fold_scores.append(best_f1)\n", " \n", " # Save best model for this fold\n", " model_path = f\"model_fold_{fold}.pth\"\n", " torch.save(best_state, model_path)\n", " print(f\"Model saved to {model_path}\")\n", " \n", " # Get OOF predictions with best model\n", " model.load_state_dict(best_state)\n", " _, val_preds, _ = validate(model, dl_va, criterion)\n", " oof_preds[val_idx] = val_preds\n", " \n", " # Log fold summary to W&B\n", " wandb.log({\n", " f\"fold_{fold}/best_val_f1\": best_f1,\n", " f\"fold_{fold}/best_epoch\": best_epoch\n", " })\n", " \n", " del model, optimizer, scaler, scheduler\n", " torch.cuda.empty_cache()\n", " gc.collect()\n", " \n", " # Overall CV results\n", " print(f\"\\n{'='*60}\")\n", " print(\"CROSS-VALIDATION RESULTS\")\n", " print(f\"{'='*60}\")\n", " for i, score in enumerate(fold_scores):\n", " print(f\"Fold {i+1}: {score:.4f}\")\n", " print(f\"\\nMean CV F1: {np.mean(fold_scores):.4f} +/- {np.std(fold_scores):.4f}\")\n", " \n", " # Log overall CV results\n", " wandb.log({\n", " \"cv_mean_f1\": np.mean(fold_scores),\n", " \"cv_std_f1\": np.std(fold_scores),\n", " \"cv_fold_scores\": fold_scores\n", " })\n", " \n", " return oof_preds, df[CONFIG.LABELS].values, fold_scores\n", "\n", "# Run training\n", "if os.path.exists(CONFIG.TRAIN_CSV):\n", " oof_preds, y_true, fold_scores = run_training()\n", "else:\n", " print(\"Skipping training as data is not found.\")\n", " oof_preds, y_true, fold_scores = None, None, None" ] }, { "cell_type": "markdown", "id": "d297e06a", "metadata": { "papermill": { "duration": 0.027881, "end_time": "2025-12-02T23:08:14.842769", "exception": false, "start_time": "2025-12-02T23:08:14.814888", "status": "completed" }, "tags": [] }, "source": [ "## 10. Threshold Optimization\n", "\n", "Optimize the classification thresholds for each emotion label independently. Instead of using a fixed 0.5 threshold, this searches for the optimal threshold per label that maximizes the F1 score on out-of-fold predictions. This often improves performance on imbalanced datasets." ] }, { "cell_type": "code", "execution_count": null, "id": "0e1e8e67", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T23:08:15.017898Z", "iopub.status.busy": "2025-12-02T23:08:15.016700Z", "iopub.status.idle": "2025-12-02T23:08:15.375615Z", "shell.execute_reply": "2025-12-02T23:08:15.374601Z" }, "papermill": { "duration": 0.428359, "end_time": "2025-12-02T23:08:15.377014", "exception": false, "start_time": "2025-12-02T23:08:14.948655", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "if oof_preds is not None and y_true is not None:\n", " print(\"\\nOptimizing classification thresholds...\")\n", " best_thresholds = tune_thresholds(y_true, oof_preds)\n", " \n", " # Apply optimized thresholds\n", " oof_tuned = (oof_preds >= best_thresholds).astype(int)\n", " \n", " # Compute final metrics\n", " final_metrics = compute_metrics(y_true, oof_tuned)\n", " final_f1 = final_metrics['macro_f1']\n", " \n", " print(f\"\\n{'='*60}\")\n", " print(\"FINAL RESULTS WITH OPTIMIZED THRESHOLDS\")\n", " print(f\"{'='*60}\")\n", " print(f\"\\nOverall Metrics:\")\n", " print(f\" Macro F1: {final_f1:.4f}\")\n", " print(f\" Micro F1: {final_metrics['micro_f1']:.4f}\")\n", " print(f\" Weighted F1: {final_metrics['weighted_f1']:.4f}\")\n", " \n", " print(f\"\\nPer-Label Results:\")\n", " print(f\"{'Label':<12} {'Threshold':<12} {'F1 Score':<12}\")\n", " print(\"-\" * 40)\n", " for i, label in enumerate(CONFIG.LABELS):\n", " print(f\"{label:<12} {best_thresholds[i]:<12.3f} {final_metrics[f'{label}_f1']:<12.4f}\")\n", " \n", " # Log to W&B\n", " wandb.log({\n", " \"final_macro_f1\": final_f1,\n", " \"final_micro_f1\": final_metrics['micro_f1'],\n", " \"final_weighted_f1\": final_metrics['weighted_f1'],\n", " **{f\"threshold_{label}\": best_thresholds[i] for i, label in enumerate(CONFIG.LABELS)},\n", " **{f\"final_{label}_f1\": final_metrics[f'{label}_f1'] for label in CONFIG.LABELS}\n", " })\n", " \n", " # Save thresholds\n", " np.save('best_thresholds.npy', best_thresholds)\n", " print(f\"\\nThresholds saved to best_thresholds.npy\")\n", "else:\n", " best_thresholds = np.array([0.5] * len(CONFIG.LABELS))\n", " print(\"Using default thresholds of 0.5\")" ] }, { "cell_type": "markdown", "id": "9da9fd3e", "metadata": { "papermill": { "duration": 0.022994, "end_time": "2025-12-02T23:08:15.425773", "exception": false, "start_time": "2025-12-02T23:08:15.402779", "status": "completed" }, "tags": [] }, "source": [ "## 11. Upload to Hugging Face Hub\n", "\n", "Package and upload the trained model to Hugging Face Hub for easy sharing and deployment. This includes:\n", "- Selecting the best performing fold model\n", "- Creating a comprehensive model card with performance metrics and usage examples\n", "- Uploading the model, tokenizer, optimized thresholds, and documentation to the repository" ] }, { "cell_type": "code", "execution_count": null, "id": "d5212836", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T23:08:15.471904Z", "iopub.status.busy": "2025-12-02T23:08:15.471612Z", "iopub.status.idle": "2025-12-02T23:08:15.485244Z", "shell.execute_reply": "2025-12-02T23:08:15.484621Z" }, "papermill": { "duration": 0.038319, "end_time": "2025-12-02T23:08:15.486251", "exception": false, "start_time": "2025-12-02T23:08:15.447932", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def upload_to_huggingface():\n", " \"\"\"Upload the best model to Hugging Face Hub with model card.\"\"\"\n", " \n", " if CONFIG.HF_USERNAME is None:\n", " print(\"HuggingFace username not set. Please set CONFIG.HF_USERNAME\")\n", " return\n", " \n", " print(\"\\nUploading model to Hugging Face Hub...\")\n", " \n", " # Determine best fold based on validation scores\n", " if fold_scores:\n", " best_fold = np.argmax(fold_scores)\n", " print(f\" Using model from fold {best_fold+1} (F1: {fold_scores[best_fold]:.4f})\")\n", " else:\n", " best_fold = 0\n", " print(f\" Using model from fold 1\")\n", " \n", " # Load the best model\n", " model = AutoModelForSequenceClassification.from_pretrained(\n", " CONFIG.MODEL_NAME,\n", " num_labels=len(CONFIG.LABELS),\n", " problem_type=\"multi_label_classification\"\n", " )\n", " model.load_state_dict(torch.load(f\"model_fold_{best_fold}.pth\"))\n", " \n", " # Load tokenizer\n", " tokenizer = AutoTokenizer.from_pretrained(CONFIG.MODEL_NAME)\n", " \n", " # Create repository\n", " repo_id = f\"{CONFIG.HF_USERNAME}/{CONFIG.HF_MODEL_NAME}\"\n", " print(f\" Repository: {repo_id}\")\n", " \n", " try:\n", " create_repo(repo_id, exist_ok=True, private=False)\n", " print(\" Repository created/verified\")\n", " except Exception as e:\n", " print(f\" Repository might already exist: {e}\")\n", " \n", " # Precompute all dynamic values to avoid backslashes in f-strings\n", " final_f1_str = f\"{final_f1:.4f}\" if 'final_f1' in locals() else 'N/A'\n", " \n", " if fold_scores:\n", " cv_mean_str = f\"{np.mean(fold_scores):.4f}\"\n", " cv_std_str = f\"{np.std(fold_scores):.4f}\"\n", " else:\n", " cv_mean_str = 'N/A'\n", " cv_std_str = 'N/A'\n", " \n", " if 'final_metrics' in locals():\n", " per_label_lines = []\n", " for label in CONFIG.LABELS:\n", " per_label_lines.append(f\"- **{label.capitalize()}:** {final_metrics[f'{label}_f1']:.4f}\")\n", " per_label_perf = '\\n'.join(per_label_lines)\n", " else:\n", " per_label_perf = 'N/A'\n", " \n", " if 'best_thresholds' in locals():\n", " threshold_lines = []\n", " for i, label in enumerate(CONFIG.LABELS):\n", " threshold_lines.append(f\"- **{label.capitalize()}:** {best_thresholds[i]:.3f}\")\n", " thresholds_block = '\\n'.join(threshold_lines)\n", " thresholds_list = list(best_thresholds)\n", " else:\n", " thresholds_block = 'N/A'\n", " thresholds_list = [0.5] * 5\n", " \n", " device_str = 'GPU' if torch.cuda.is_available() else 'CPU'\n", " torch_version = torch.__version__\n", " transformers_version = __import__('transformers').__version__\n", " labels_list = list(CONFIG.LABELS)\n", " labels_str = ', '.join(CONFIG.LABELS)\n", " \n", " # Create model card\n", " model_card = f\"\"\"---\n", "language: en\n", "license: apache-2.0\n", "tags:\n", "- text-classification\n", "- multi-label-classification\n", "- emotion-classification\n", "- deberta-v3\n", "- pytorch\n", "---\n", "\n", "# Emotion Classification Model\n", "\n", "This model classifies text into 5 emotion categories: **anger**, **fear**, **joy**, **sadness**, and **surprise**.\n", "\n", "## Model Description\n", "\n", "- **Base Model:** {CONFIG.MODEL_NAME}\n", "- **Task:** Multi-label text classification\n", "- **Labels:** {labels_str}\n", "- **Training Strategy:** {CONFIG.N_FOLDS}-Fold Cross-Validation\n", "- **Framework:** PyTorch + Transformers\n", "\n", "## Performance\n", "\n", "### Overall Metrics\n", "- **Macro F1:** {final_f1_str}\n", "- **Cross-Validation:** {cv_mean_str} +/- {cv_std_str}\n", "\n", "### Per-Label Performance\n", "{per_label_perf}\n", "\n", "### Optimized Thresholds\n", "{thresholds_block}\n", "\n", "## Usage\n", "\n", "```python\n", "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", "import torch\n", "import numpy as np\n", "\n", "# Load model and tokenizer\n", "model = AutoModelForSequenceClassification.from_pretrained(\"{repo_id}\")\n", "tokenizer = AutoTokenizer.from_pretrained(\"{repo_id}\")\n", "\n", "# Optimized thresholds (use these for best results)\n", "thresholds = np.array({thresholds_list})\n", "labels = {labels_list}\n", "\n", "# Predict emotions\n", "def predict_emotions(text):\n", " inputs = tokenizer(text, return_tensors=\"pt\", truncation=True, max_length=128)\n", " with torch.no_grad():\n", " outputs = model(**inputs)\n", " probs = torch.sigmoid(outputs.logits).cpu().numpy()[0]\n", " predictions = (probs >= thresholds).astype(int)\n", " return {{label: (pred, prob) for label, pred, prob in zip(labels, predictions, probs)}}\n", "\n", "# Example\n", "text = \"I am so excited about this amazing opportunity!\"\n", "result = predict_emotions(text)\n", "print(result)\n", "```\n", "\n", "## Training Details\n", "\n", "- **Optimizer:** AdamW with differential weight decay\n", "- **Learning Rate:** {CONFIG.LR}\n", "- **Batch Size:** {CONFIG.BATCH_SIZE}\n", "- **Epochs:** {CONFIG.EPOCHS}\n", "- **Max Sequence Length:** {CONFIG.MAX_LEN}\n", "- **Warmup Ratio:** {CONFIG.WARMUP_RATIO}\n", "- **Weight Decay:** {CONFIG.WEIGHT_DECAY}\n", "- **Mixed Precision:** Enabled (FP16)\n", "- **Gradient Clipping:** 1.0\n", "\n", "## Training Infrastructure\n", "\n", "- **Device:** {device_str}\n", "- **Training Time:** ~{CONFIG.EPOCHS * CONFIG.N_FOLDS * 15} minutes (approximate)\n", "- **Framework Versions:**\n", " - PyTorch: {torch_version}\n", " - Transformers: {transformers_version}\n", "\n", "## Model Card Authors\n", "\n", "{CONFIG.HF_USERNAME}\n", "\n", "## Model Card Contact\n", "\n", "For questions or feedback, please open an issue in the model repository.\n", "\"\"\"\n", " \n", " # Save model card\n", " with open(\"README.md\", \"w\") as f:\n", " f.write(model_card)\n", " print(\" Model card created\")\n", " \n", " # Push to hub\n", " try:\n", " # Upload model card first to avoid metadata validation issues\n", " api = HfApi()\n", " print(\" Uploading model card...\")\n", " api.upload_file(\n", " path_or_fileobj=\"README.md\",\n", " path_in_repo=\"README.md\",\n", " repo_id=repo_id,\n", " commit_message=\"Add model card\"\n", " )\n", " print(\" Model card uploaded\")\n", " \n", " print(\" Uploading model...\")\n", " model.push_to_hub(repo_id, commit_message=\"Upload emotion classification model\")\n", " print(\" Model uploaded\")\n", " \n", " print(\" Uploading tokenizer...\")\n", " tokenizer.push_to_hub(repo_id, commit_message=\"Upload tokenizer\")\n", " print(\" Tokenizer uploaded\")\n", " \n", " print(\" Uploading thresholds...\")\n", " api.upload_file(\n", " path_or_fileobj=\"best_thresholds.npy\",\n", " path_in_repo=\"best_thresholds.npy\",\n", " repo_id=repo_id,\n", " commit_message=\"Add optimized thresholds\"\n", " )\n", " print(\" Thresholds uploaded\")\n", " \n", " print(f\"\\nModel successfully uploaded to: https://huggingface.co/{repo_id}\")\n", " \n", " # Log to W&B\n", " wandb.log({\"huggingface_repo\": repo_id})\n", " wandb.config.update({\"huggingface_repo\": repo_id})\n", " \n", " return repo_id\n", " \n", " except Exception as e:\n", " print(f\" Error uploading to Hugging Face: {e}\")\n", " return None\n", "\n", "# Upload model\n", "if oof_preds is not None:\n", " hf_repo_id = upload_to_huggingface()\n", "else:\n", " print(\"Skipping HuggingFace upload as training was not completed.\")\n", " hf_repo_id = None" ] }, { "cell_type": "markdown", "id": "e198cc41", "metadata": { "papermill": { "duration": 0.029407, "end_time": "2025-12-02T23:08:15.538894", "exception": false, "start_time": "2025-12-02T23:08:15.509487", "status": "completed" }, "tags": [] }, "source": [ "## 12. Save Training Summary\n", "\n", "Create and save a comprehensive JSON summary of the training experiment. This includes all cross-validation scores, final metrics, optimized thresholds, per-label performance, and links to the HuggingFace repository and W&B run for future reference." ] }, { "cell_type": "code", "execution_count": null, "id": "536214aa", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T23:08:15.589409Z", "iopub.status.busy": "2025-12-02T23:08:15.589124Z", "iopub.status.idle": "2025-12-02T23:08:15.596763Z", "shell.execute_reply": "2025-12-02T23:08:15.595879Z" }, "papermill": { "duration": 0.032272, "end_time": "2025-12-02T23:08:15.597815", "exception": false, "start_time": "2025-12-02T23:08:15.565543", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "if oof_preds is not None:\n", " # Create training summary\n", " summary = {\n", " \"experiment_name\": CONFIG.EXPERIMENT_NAME,\n", " \"model\": CONFIG.MODEL_NAME,\n", " \"n_folds\": CONFIG.N_FOLDS,\n", " \"cv_scores\": fold_scores,\n", " \"cv_mean\": np.mean(fold_scores),\n", " \"cv_std\": np.std(fold_scores),\n", " \"final_macro_f1\": final_f1,\n", " \"thresholds\": best_thresholds.tolist(),\n", " \"per_label_f1\": {label: final_metrics[f'{label}_f1'] for label in CONFIG.LABELS},\n", " \"huggingface_repo\": hf_repo_id,\n", " \"wandb_run\": wandb.run.url if wandb.run else None,\n", " }\n", " \n", " import json\n", " with open('training_summary.json', 'w') as f:\n", " json.dump(summary, f, indent=2)\n", " \n", " print(\"\\n Training summary saved to training_summary.json\")\n", " print(\"\\n Summary:\")\n", " print(json.dumps(summary, indent=2))" ] }, { "cell_type": "markdown", "id": "cd2be63d", "metadata": { "papermill": { "duration": 0.022472, "end_time": "2025-12-02T23:08:15.643687", "exception": false, "start_time": "2025-12-02T23:08:15.621215", "status": "completed" }, "tags": [] }, "source": [ "## 13. Finish W&B Run\n", "\n", "Finalize the Weights & Biases experiment tracking run and display summary information including links to the uploaded model and experiment dashboard." ] }, { "cell_type": "code", "execution_count": null, "id": "410eb83b", "metadata": { "execution": { "iopub.execute_input": "2025-12-02T23:08:15.690482Z", "iopub.status.busy": "2025-12-02T23:08:15.690012Z", "iopub.status.idle": "2025-12-02T23:08:16.168743Z", "shell.execute_reply": "2025-12-02T23:08:16.167804Z" }, "papermill": { "duration": 0.50344, "end_time": "2025-12-02T23:08:16.170023", "exception": false, "start_time": "2025-12-02T23:08:15.666583", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Finish W&B run\n", "if wandb.run:\n", " print(f\"\\nView your experiment at: {wandb.run.url}\")\n", " wandb.finish()\n", " print(\"W&B run finished\")\n", "\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"TRAINING COMPLETE\")\n", "print(\"=\"*60)\n", "if hf_repo_id:\n", " print(f\"\\nModel: https://huggingface.co/{hf_repo_id}\")\n", "if wandb.run:\n", " print(f\"W&B: {wandb.run.url}\")\n", "print(\"\\nNext: Use the submission notebook to create Kaggle predictions\")" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "databundleVersionId": 13800781, "sourceId": 115439, "sourceType": "competition" } ], "dockerImageVersionId": 31193, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" }, "papermill": { "default_parameters": {}, "duration": 4685.493997, "end_time": "2025-12-02T23:08:19.013597", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2025-12-02T21:50:13.519600", "version": "2.6.0" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "0106dbdd2ba04fcb9265829364e9fa86": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "HTMLView", "description": "", "description_allow_html": false, "layout": "IPY_MODEL_ff14fb3db5d24ad99736c82ac409c4a2", "placeholder": "", "style": "IPY_MODEL_029db6685a984cacacc3c434eeecd35e", "tabbable": null, "tooltip": null, "value": "pytorch_model.bin: 100%" } }, "029db6685a984cacacc3c434eeecd35e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "StyleView", "background": null, "description_width": "", "font_size": null, "text_color": null } }, "049b4a2008fe400e9b681afc91ef83fa": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "069583cf6e8b432881abc7e0d02e1b7e": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "07e5fae79cfb4a7fb37dcb0840238572": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "StyleView", "background": null, "description_width": "", "font_size": null, "text_color": null } }, "091944a3e55241ee94fa68fde6c329a1": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0ac432a9fd2e4ccb90283152bd59d8a1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a9f314d420a24dbcbe264865a9efc3a0", "IPY_MODEL_fc06462f0f7e401e89e738ce16f15080", "IPY_MODEL_27089787f99c4085bbb758cc502e440c" ], "layout": "IPY_MODEL_366eaef8c5d34a5d9ee56a0a21a86556", "tabbable": null, "tooltip": null } }, "0c2f1d1b3a734769ba43838c156dea96": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "StyleView", "background": null, "description_width": "", "font_size": null, "text_color": null } }, "0d001427218e4d8ea4ac90c47cbb6ad4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "HTMLView", "description": "", "description_allow_html": false, "layout": "IPY_MODEL_946fe7888ef8424a864c49e8d3fa1827", "placeholder": "", "style": "IPY_MODEL_ae1b4e060d39475db575f2aee021d97c", "tabbable": null, "tooltip": null, "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "10f5aa2d2b3b4a478271e81ca29c45b3": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "129bb7b77bf84e6d9cb384e84a875f0a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "HTMLView", "description": "", "description_allow_html": false, "layout": "IPY_MODEL_2002ec62d11f4cd48d605779848dbb98", "placeholder": "", "style": "IPY_MODEL_f92e3a4990824985b32bc22144bda268", "tabbable": null, "tooltip": null, "value": "config.json: 100%" } }, "136b397a7b024e5f9b89ccde756c7c09": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "167318755c7746dbb59ef9241b8c12ad": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "StyleView", "background": null, "description_width": "", "font_size": null, "text_color": null } }, "16db4c1eaf8146eea28be07e122410dc": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "177e148a2c1a42e2affb19b3c97f3824": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "StyleView", "background": null, "description_width": "", "font_size": null, "text_color": null } }, "19aff18ca92e4592b3dd2136d71e26cf": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "HTMLView", "description": "", "description_allow_html": false, "layout": "IPY_MODEL_9c6bbc78f63f4d48a10336aa681dd3a7", "placeholder": "", "style": "IPY_MODEL_5563066744da435c930bcc5f6e67f714", "tabbable": null, "tooltip": null, "value": " 2.46M/2.46M [00:00<00:00, 7.29MB/s]" } }, "1dc60e1ec68848b38a4b9357395e86c7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "ButtonStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "StyleView", "button_color": null, "font_family": null, "font_size": null, "font_style": null, "font_variant": null, "font_weight": null, "text_color": null, "text_decoration": null } }, "2002ec62d11f4cd48d605779848dbb98": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2191d49677ae46c7b12116813d547ff7": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "24ab545e5c6b478695d295507ed27457": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "27089787f99c4085bbb758cc502e440c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "HTMLView", "description": "", "description_allow_html": false, "layout": "IPY_MODEL_7fbedc97a134458984b41809b7a39fa3", "placeholder": "", "style": "IPY_MODEL_860e6cfa0e844cca826506a4045bf512", "tabbable": null, "tooltip": null, "value": " 52.0/52.0 [00:00<00:00, 6.65kB/s]" } }, "2983dac9fcc9472f8c33ad42d670b6a4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "PasswordModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_allow_html": false, "disabled": false, "layout": "IPY_MODEL_24ab545e5c6b478695d295507ed27457", "placeholder": "", "style": "IPY_MODEL_4e7fcf5752944319991cb57e33e90cd9", "tabbable": null, "tooltip": null, "value": "" } }, "2cb3e871734a4251a7a1114608de6846": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "StyleView", "background": null, "description_width": "", "font_size": null, "text_color": null } }, "366eaef8c5d34a5d9ee56a0a21a86556": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "2.0.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "2.0.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border_bottom": null, "border_left": null, "border_right": null, "border_top": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3bc88060e02a409fae3c7e8fe9e23112": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_allow_html": false, "layout": "IPY_MODEL_89341038afd24b7788a39a54f82f6f06", "max": 873673253, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ee8809fdbdd14d9a939fd09a2d800569", "tabbable": null, "tooltip": null, "value": 873673253 } }, "42621cc97ce64850b68519df2c852723": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "2.0.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "2.0.0", "_view_name": "HTMLView", "description": "", "description_allow_html": false, "layout": "IPY_MODEL_bfcb6c8a947a42c29ff6dae194769252", "placeholder": "", "style": "IPY_MODEL_0c2f1d1b3a734769ba43838c156dea96", "tabbable": null, "tooltip": null, "value": "